From 8e56a84566036cfff0aa5c3d80ae1b051d2bd0bf Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sun, 23 Apr 2023 00:01:08 -0400 Subject: core_timing: Use CNTPCT as the guest CPU tick Previously, we were mixing the raw CPU frequency and CNTFRQ. The raw CPU frequency (1020 MHz) should've never been used as CNTPCT (whose frequency is CNTFRQ) is the only counter available. --- .../renderer/command/performance/performance.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'src/audio_core/renderer/command') diff --git a/src/audio_core/renderer/command/performance/performance.cpp b/src/audio_core/renderer/command/performance/performance.cpp index 985958b03..4a881547f 100644 --- a/src/audio_core/renderer/command/performance/performance.cpp +++ b/src/audio_core/renderer/command/performance/performance.cpp @@ -5,7 +5,6 @@ #include "audio_core/renderer/command/performance/performance.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" namespace AudioCore::AudioRenderer { @@ -18,20 +17,18 @@ void PerformanceCommand::Process(const ADSP::CommandListProcessor& processor) { auto base{entry_address.translated_address}; if (state == PerformanceState::Start) { auto start_time_ptr{reinterpret_cast(base + entry_address.entry_start_time_offset)}; - *start_time_ptr = static_cast( - Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - - processor.start_time - processor.current_processing_time) - .count()); + *start_time_ptr = + static_cast(processor.system->CoreTiming().GetClockTicks() - processor.start_time - + processor.current_processing_time); } else if (state == PerformanceState::Stop) { auto processed_time_ptr{ reinterpret_cast(base + entry_address.entry_processed_time_offset)}; auto entry_count_ptr{ reinterpret_cast(base + entry_address.header_entry_count_offset)}; - *processed_time_ptr = static_cast( - Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - - processor.start_time - processor.current_processing_time) - .count()); + *processed_time_ptr = + static_cast(processor.system->CoreTiming().GetClockTicks() - processor.start_time - + processor.current_processing_time); (*entry_count_ptr)++; } } -- cgit v1.2.3 From 5da70f719703084482933e103e561cc98163f370 Mon Sep 17 00:00:00 2001 From: Kelebek1 Date: Tue, 23 May 2023 14:45:54 +0100 Subject: Remove memory allocations in some hot paths --- src/audio_core/device/audio_buffers.h | 8 +-- src/audio_core/device/device_session.cpp | 12 ++--- src/audio_core/device/device_session.h | 7 +-- src/audio_core/in/audio_in_system.cpp | 5 +- src/audio_core/out/audio_out_system.cpp | 4 +- .../renderer/command/data_source/decode.cpp | 23 ++++----- .../renderer/command/effect/compressor.cpp | 8 +-- src/audio_core/renderer/command/effect/delay.cpp | 14 ++--- .../renderer/command/effect/i3dl2_reverb.cpp | 4 +- .../renderer/command/effect/light_limiter.cpp | 12 ++--- src/audio_core/renderer/command/effect/reverb.cpp | 12 ++--- .../renderer/command/sink/circular_buffer.cpp | 4 +- src/audio_core/renderer/command/sink/device.cpp | 5 +- src/audio_core/renderer/mix/mix_context.cpp | 6 +-- src/audio_core/renderer/nodes/node_states.cpp | 4 +- src/audio_core/renderer/nodes/node_states.h | 2 +- src/audio_core/renderer/system.cpp | 1 + src/audio_core/sink/null_sink.h | 2 +- src/audio_core/sink/sink_stream.cpp | 15 +++--- src/audio_core/sink/sink_stream.h | 5 +- src/common/ring_buffer.h | 3 +- src/common/scratch_buffer.h | 9 ++++ src/core/hle/kernel/k_synchronization_object.cpp | 3 +- src/core/hle/kernel/k_thread.cpp | 8 ++- src/core/hle/kernel/k_thread.h | 3 +- src/core/hle/kernel/svc/svc_ipc.cpp | 7 +-- src/core/hle/kernel/svc/svc_synchronization.cpp | 10 ++-- src/core/hle/kernel/svc/svc_thread.cpp | 2 +- src/core/hle/service/audio/audin_u.cpp | 16 +++--- src/core/hle/service/audio/audout_u.cpp | 15 ++---- src/core/hle/service/audio/audren_u.cpp | 22 ++++---- src/core/hle/service/audio/audren_u.h | 1 + src/core/hle/service/audio/hwopus.cpp | 9 ++-- src/core/hle/service/nvdrv/devices/nvdevice.h | 6 +-- .../hle/service/nvdrv/devices/nvdisp_disp0.cpp | 6 +-- src/core/hle/service/nvdrv/devices/nvdisp_disp0.h | 8 +-- .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 31 ++++++------ src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 30 ++++++----- src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | 19 ++++--- src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | 21 ++++---- .../hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp | 32 ++++++------ .../hle/service/nvdrv/devices/nvhost_ctrl_gpu.h | 38 +++++++------- src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 59 +++++++++++----------- src/core/hle/service/nvdrv/devices/nvhost_gpu.h | 36 ++++++------- .../hle/service/nvdrv/devices/nvhost_nvdec.cpp | 6 +-- src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | 8 +-- .../service/nvdrv/devices/nvhost_nvdec_common.cpp | 15 +++--- .../service/nvdrv/devices/nvhost_nvdec_common.h | 12 ++--- .../hle/service/nvdrv/devices/nvhost_nvjpg.cpp | 8 +-- src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h | 10 ++-- src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 6 +-- src/core/hle/service/nvdrv/devices/nvhost_vic.h | 8 +-- src/core/hle/service/nvdrv/devices/nvmap.cpp | 20 ++++---- src/core/hle/service/nvdrv/devices/nvmap.h | 20 ++++---- src/core/hle/service/nvdrv/nvdrv.cpp | 8 +-- src/core/hle/service/nvdrv/nvdrv.h | 8 +-- src/core/hle/service/nvdrv/nvdrv_interface.cpp | 24 ++++----- src/core/hle/service/nvdrv/nvdrv_interface.h | 3 ++ src/core/hle/service/nvnflinger/parcel.h | 7 +-- .../backend/glsl/glsl_emit_context.cpp | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 +- .../backend/spirv/spirv_emit_context.cpp | 2 +- src/shader_recompiler/runtime_info.h | 3 +- src/video_core/buffer_cache/buffer_cache.h | 4 +- src/video_core/buffer_cache/buffer_cache_base.h | 4 +- src/video_core/cdma_pusher.h | 1 - src/video_core/dma_pusher.h | 8 +-- src/video_core/engines/maxwell_dma.cpp | 35 +++++++------ src/video_core/host1x/codecs/h264.cpp | 4 +- src/video_core/memory_manager.cpp | 13 ++--- src/video_core/memory_manager.h | 15 ++++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 10 +++- .../renderer_vulkan/vk_texture_cache.cpp | 27 +++++----- src/video_core/shader_cache.cpp | 4 +- src/video_core/texture_cache/image_base.h | 5 +- src/video_core/texture_cache/texture_cache.h | 14 ++--- src/video_core/texture_cache/texture_cache_base.h | 4 +- src/video_core/texture_cache/util.cpp | 48 ++++++++++-------- src/video_core/texture_cache/util.h | 31 ++++++------ src/video_core/transform_feedback.cpp | 8 +-- src/video_core/transform_feedback.h | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 84 files changed, 503 insertions(+), 460 deletions(-) (limited to 'src/audio_core/renderer/command') diff --git a/src/audio_core/device/audio_buffers.h b/src/audio_core/device/audio_buffers.h index 15082f6c6..5d8ed0ef7 100644 --- a/src/audio_core/device/audio_buffers.h +++ b/src/audio_core/device/audio_buffers.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "audio_buffer.h" #include "audio_core/device/device_session.h" @@ -48,7 +49,7 @@ public: * * @param out_buffers - The buffers which were registered. */ - void RegisterBuffers(std::vector& out_buffers) { + void RegisterBuffers(boost::container::static_vector& out_buffers) { std::scoped_lock l{lock}; const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit), BufferAppendLimit - registered_count)}; @@ -162,7 +163,8 @@ public: * @param max_buffers - Maximum number of buffers to released. * @return The number of buffers released. */ - u32 GetRegisteredAppendedBuffers(std::vector& buffers_flushed, u32 max_buffers) { + u32 GetRegisteredAppendedBuffers( + boost::container::static_vector& buffers_flushed, u32 max_buffers) { std::scoped_lock l{lock}; if (registered_count + appended_count == 0) { return 0; @@ -270,7 +272,7 @@ public: */ bool FlushBuffers(u32& buffers_released) { std::scoped_lock l{lock}; - std::vector buffers_flushed{}; + boost::container::static_vector buffers_flushed{}; buffers_released = GetRegisteredAppendedBuffers(buffers_flushed, append_limit); diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp index b5c0ef0e6..86811fcb8 100644 --- a/src/audio_core/device/device_session.cpp +++ b/src/audio_core/device/device_session.cpp @@ -79,7 +79,7 @@ void DeviceSession::ClearBuffers() { } } -void DeviceSession::AppendBuffers(std::span buffers) const { +void DeviceSession::AppendBuffers(std::span buffers) { for (const auto& buffer : buffers) { Sink::SinkBuffer new_buffer{ .frames = buffer.size / (channel_count * sizeof(s16)), @@ -88,13 +88,13 @@ void DeviceSession::AppendBuffers(std::span buffers) const { .consumed = false, }; + tmp_samples.resize_destructive(buffer.size / sizeof(s16)); if (type == Sink::StreamType::In) { - std::vector samples{}; - stream->AppendBuffer(new_buffer, samples); + stream->AppendBuffer(new_buffer, tmp_samples); } else { - std::vector samples(buffer.size / sizeof(s16)); - system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, samples.data(), buffer.size); - stream->AppendBuffer(new_buffer, samples); + system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(), + buffer.size); + stream->AppendBuffer(new_buffer, tmp_samples); } } } diff --git a/src/audio_core/device/device_session.h b/src/audio_core/device/device_session.h index 75f766c68..7d52f362d 100644 --- a/src/audio_core/device/device_session.h +++ b/src/audio_core/device/device_session.h @@ -10,6 +10,7 @@ #include "audio_core/common/common.h" #include "audio_core/sink/sink.h" +#include "common/scratch_buffer.h" #include "core/hle/service/audio/errors.h" namespace Core { @@ -62,7 +63,7 @@ public: * * @param buffers - The buffers to play. */ - void AppendBuffers(std::span buffers) const; + void AppendBuffers(std::span buffers); /** * (Audio In only) Pop samples from the backend, and write them back to this buffer's address. @@ -146,8 +147,8 @@ private: std::shared_ptr thread_event; /// Is this session initialised? bool initialized{}; - /// Buffer queue - std::vector buffer_queue{}; + /// Temporary sample buffer + Common::ScratchBuffer tmp_samples{}; }; } // namespace AudioCore diff --git a/src/audio_core/in/audio_in_system.cpp b/src/audio_core/in/audio_in_system.cpp index e23e51758..579129121 100644 --- a/src/audio_core/in/audio_in_system.cpp +++ b/src/audio_core/in/audio_in_system.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include + #include "audio_core/audio_event.h" #include "audio_core/audio_manager.h" #include "audio_core/in/audio_in_system.h" @@ -89,7 +90,7 @@ Result System::Start() { session->Start(); state = State::Started; - std::vector buffers_to_flush{}; + boost::container::static_vector buffers_to_flush{}; buffers.RegisterBuffers(buffers_to_flush); session->AppendBuffers(buffers_to_flush); session->SetRingSize(static_cast(buffers_to_flush.size())); @@ -134,7 +135,7 @@ bool System::AppendBuffer(const AudioInBuffer& buffer, const u64 tag) { void System::RegisterBuffers() { if (state == State::Started) { - std::vector registered_buffers{}; + boost::container::static_vector registered_buffers{}; buffers.RegisterBuffers(registered_buffers); session->AppendBuffers(registered_buffers); } diff --git a/src/audio_core/out/audio_out_system.cpp b/src/audio_core/out/audio_out_system.cpp index bd13f7219..0adf64bd3 100644 --- a/src/audio_core/out/audio_out_system.cpp +++ b/src/audio_core/out/audio_out_system.cpp @@ -89,7 +89,7 @@ Result System::Start() { session->Start(); state = State::Started; - std::vector buffers_to_flush{}; + boost::container::static_vector buffers_to_flush{}; buffers.RegisterBuffers(buffers_to_flush); session->AppendBuffers(buffers_to_flush); session->SetRingSize(static_cast(buffers_to_flush.size())); @@ -134,7 +134,7 @@ bool System::AppendBuffer(const AudioOutBuffer& buffer, u64 tag) { void System::RegisterBuffers() { if (state == State::Started) { - std::vector registered_buffers{}; + boost::container::static_vector registered_buffers{}; buffers.RegisterBuffers(registered_buffers); session->AppendBuffers(registered_buffers); } diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp index ff5d31bd6..f45933203 100644 --- a/src/audio_core/renderer/command/data_source/decode.cpp +++ b/src/audio_core/renderer/command/data_source/decode.cpp @@ -8,6 +8,7 @@ #include "audio_core/renderer/command/resample/resample.h" #include "common/fixed_point.h" #include "common/logging/log.h" +#include "common/scratch_buffer.h" #include "core/memory.h" namespace AudioCore::AudioRenderer { @@ -27,6 +28,7 @@ constexpr std::array PitchBySrcQuality = {4, 8, 4}; template static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, const DecodeArg& req) { + std::array tmp_samples{}; constexpr s32 min{std::numeric_limits::min()}; constexpr s32 max{std::numeric_limits::max()}; @@ -49,18 +51,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, const u64 size{channel_count * samples_to_decode}; const u64 size_bytes{size * sizeof(T)}; - std::vector samples(size); - memory.ReadBlockUnsafe(source, samples.data(), size_bytes); + memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes); if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast(samples[i * channel_count + req.target_channel] * + auto sample{static_cast(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits::max())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { for (u32 i = 0; i < samples_to_decode; i++) { - out_buffer[i] = samples[i * channel_count + req.target_channel]; + out_buffer[i] = tmp_samples[i * channel_count + req.target_channel]; } } } break; @@ -73,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, } const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; - std::vector samples(samples_to_decode); - memory.ReadBlockUnsafe(source, samples.data(), samples_to_decode * sizeof(T)); + memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T)); if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast(samples[i * channel_count + req.target_channel] * + auto sample{static_cast(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits::max())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { - std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); + std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16)); } break; } @@ -101,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, */ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, const DecodeArg& req) { + std::array wavebuffer{}; constexpr u32 SamplesPerFrame{14}; constexpr u32 NibblesPerFrame{16}; @@ -138,9 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, } const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; - std::vector wavebuffer(size); - memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), - wavebuffer.size()); + memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size); auto context{req.adpcm_context}; auto header{context->header}; @@ -258,7 +257,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf u32 offset{voice_state.offset}; auto output_buffer{args.output}; - std::vector temp_buffer(TempBufferSize, 0); + std::array temp_buffer{}; while (remaining_sample_count > 0) { const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)}; diff --git a/src/audio_core/renderer/command/effect/compressor.cpp b/src/audio_core/renderer/command/effect/compressor.cpp index 7229618e8..ee9b68d5b 100644 --- a/src/audio_core/renderer/command/effect/compressor.cpp +++ b/src/audio_core/renderer/command/effect/compressor.cpp @@ -44,8 +44,8 @@ static void InitializeCompressorEffect(const CompressorInfo::ParameterVersion2& static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params, CompressorInfo::State& state, bool enabled, - std::vector> input_buffers, - std::vector> output_buffers, u32 sample_count) { + std::span> input_buffers, + std::span> output_buffers, u32 sample_count) { if (enabled) { auto state_00{state.unk_00}; auto state_04{state.unk_04}; @@ -124,8 +124,8 @@ void CompressorCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& } void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (s16 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/delay.cpp b/src/audio_core/renderer/command/effect/delay.cpp index a4e408d40..e536cbb1e 100644 --- a/src/audio_core/renderer/command/effect/delay.cpp +++ b/src/audio_core/renderer/command/effect/delay.cpp @@ -51,7 +51,7 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params, state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor(); state.delay_lines[channel].sample_count = sample_count.to_int_floor(); state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0); - if (state.delay_lines[channel].buffer.size() == 0) { + if (state.delay_lines[channel].sample_count == 0) { state.delay_lines[channel].buffer.push_back(0); } state.delay_lines[channel].buffer_pos = 0; @@ -74,8 +74,8 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params, */ template static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, - std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + std::span> inputs, std::span> outputs, + const u32 sample_count) { for (u32 sample_index = 0; sample_index < sample_count; sample_index++) { std::array, NumChannels> input_samples{}; for (u32 channel = 0; channel < NumChannels; channel++) { @@ -153,8 +153,8 @@ static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::St * @param sample_count - Number of samples to process. */ static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, - const bool enabled, std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + const bool enabled, std::span> inputs, + std::span> outputs, const u32 sample_count) { if (!IsChannelCountValid(params.channel_count)) { LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count); @@ -208,8 +208,8 @@ void DelayCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proce } void DelayCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (s16 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp index 27d8b9844..d2bfb67cc 100644 --- a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp +++ b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp @@ -408,8 +408,8 @@ void I3dl2ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& } void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/light_limiter.cpp b/src/audio_core/renderer/command/effect/light_limiter.cpp index e8fb0e2fc..4161a9821 100644 --- a/src/audio_core/renderer/command/effect/light_limiter.cpp +++ b/src/audio_core/renderer/command/effect/light_limiter.cpp @@ -47,8 +47,8 @@ static void InitializeLightLimiterEffect(const LightLimiterInfo::ParameterVersio */ static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params, LightLimiterInfo::State& state, const bool enabled, - std::vector>& inputs, - std::vector>& outputs, const u32 sample_count, + std::span> inputs, + std::span> outputs, const u32 sample_count, LightLimiterInfo::StatisticsInternal* statistics) { constexpr s64 min{std::numeric_limits::min()}; constexpr s64 max{std::numeric_limits::max()}; @@ -147,8 +147,8 @@ void LightLimiterVersion1Command::Dump([[maybe_unused]] const ADSP::CommandListP } void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, @@ -190,8 +190,8 @@ void LightLimiterVersion2Command::Dump([[maybe_unused]] const ADSP::CommandListP } void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp index 8b9b65214..fc2f15a5e 100644 --- a/src/audio_core/renderer/command/effect/reverb.cpp +++ b/src/audio_core/renderer/command/effect/reverb.cpp @@ -250,8 +250,8 @@ static Common::FixedPoint<50, 14> Axfx2AllPassTick(ReverbInfo::ReverbDelayLine& */ template static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, - std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + std::span> inputs, + std::span> outputs, const u32 sample_count) { static constexpr std::array OutTapIndexes1Ch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -369,8 +369,8 @@ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, Rever * @param sample_count - Number of samples to process. */ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, - const bool enabled, std::vector>& inputs, - std::vector>& outputs, const u32 sample_count) { + const bool enabled, std::span> inputs, + std::span> outputs, const u32 sample_count) { if (enabled) { switch (params.channel_count) { case 0: @@ -412,8 +412,8 @@ void ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proc } void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector> input_buffers(parameter.channel_count); - std::vector> output_buffers(parameter.channel_count); + std::array, MaxChannels> input_buffers{}; + std::array, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/sink/circular_buffer.cpp b/src/audio_core/renderer/command/sink/circular_buffer.cpp index ded5afc94..e2ce59792 100644 --- a/src/audio_core/renderer/command/sink/circular_buffer.cpp +++ b/src/audio_core/renderer/command/sink/circular_buffer.cpp @@ -24,7 +24,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces constexpr s32 min{std::numeric_limits::min()}; constexpr s32 max{std::numeric_limits::max()}; - std::vector output(processor.sample_count); + std::array output{}; for (u32 channel = 0; channel < input_count; channel++) { auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count, processor.sample_count)}; @@ -33,7 +33,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces } processor.memory->WriteBlockUnsafe(address + pos, output.data(), - output.size() * sizeof(s16)); + processor.sample_count * sizeof(s16)); pos += static_cast(processor.sample_count * sizeof(s16)); if (pos >= size) { pos = 0; diff --git a/src/audio_core/renderer/command/sink/device.cpp b/src/audio_core/renderer/command/sink/device.cpp index e88372a75..5f74dd7ad 100644 --- a/src/audio_core/renderer/command/sink/device.cpp +++ b/src/audio_core/renderer/command/sink/device.cpp @@ -33,8 +33,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) { .consumed{false}, }; - std::vector samples(out_buffer.frames * input_count); - + std::array samples{}; for (u32 channel = 0; channel < input_count; channel++) { const auto offset{inputs[channel] * out_buffer.frames}; @@ -45,7 +44,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) { } out_buffer.tag = reinterpret_cast(samples.data()); - stream->AppendBuffer(out_buffer, samples); + stream->AppendBuffer(out_buffer, {samples.data(), out_buffer.frames * input_count}); if (stream->IsPaused()) { stream->Start(); diff --git a/src/audio_core/renderer/mix/mix_context.cpp b/src/audio_core/renderer/mix/mix_context.cpp index 35b748ede..3a18ae7c2 100644 --- a/src/audio_core/renderer/mix/mix_context.cpp +++ b/src/audio_core/renderer/mix/mix_context.cpp @@ -125,10 +125,10 @@ bool MixContext::TSortInfo(const SplitterContext& splitter_context) { return false; } - std::vector sorted_results{node_states.GetSortedResuls()}; - const auto result_size{std::min(count, static_cast(sorted_results.size()))}; + auto sorted_results{node_states.GetSortedResuls()}; + const auto result_size{std::min(count, static_cast(sorted_results.second))}; for (s32 i = 0; i < result_size; i++) { - sorted_mix_infos[i] = &mix_infos[sorted_results[i]]; + sorted_mix_infos[i] = &mix_infos[sorted_results.first[i]]; } CalcMixBufferOffset(); diff --git a/src/audio_core/renderer/nodes/node_states.cpp b/src/audio_core/renderer/nodes/node_states.cpp index 1821a51e6..b7a44a54c 100644 --- a/src/audio_core/renderer/nodes/node_states.cpp +++ b/src/audio_core/renderer/nodes/node_states.cpp @@ -134,8 +134,8 @@ u32 NodeStates::GetNodeCount() const { return node_count; } -std::vector NodeStates::GetSortedResuls() const { - return {results.rbegin(), results.rbegin() + result_pos}; +std::pair::reverse_iterator, size_t> NodeStates::GetSortedResuls() const { + return {results.rbegin(), result_pos}; } } // namespace AudioCore::AudioRenderer diff --git a/src/audio_core/renderer/nodes/node_states.h b/src/audio_core/renderer/nodes/node_states.h index 94b1d1254..e768cd4b5 100644 --- a/src/audio_core/renderer/nodes/node_states.h +++ b/src/audio_core/renderer/nodes/node_states.h @@ -175,7 +175,7 @@ public: * * @return Vector of nodes in reverse order. */ - std::vector GetSortedResuls() const; + std::pair::reverse_iterator, size_t> GetSortedResuls() const; private: /// Number of nodes in the graph diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp index 53b258c4f..a23627472 100644 --- a/src/audio_core/renderer/system.cpp +++ b/src/audio_core/renderer/system.cpp @@ -444,6 +444,7 @@ Result System::Update(std::span input, std::span performance, std: std::scoped_lock l{lock}; const auto start_time{core.CoreTiming().GetClockTicks()}; + std::memset(output.data(), 0, output.size()); InfoUpdater info_updater(input, output, process_handle, behavior); diff --git a/src/audio_core/sink/null_sink.h b/src/audio_core/sink/null_sink.h index 1215d3cd2..b6b43c93e 100644 --- a/src/audio_core/sink/null_sink.h +++ b/src/audio_core/sink/null_sink.h @@ -20,7 +20,7 @@ public: explicit NullSinkStreamImpl(Core::System& system_, StreamType type_) : SinkStream{system_, type_} {} ~NullSinkStreamImpl() override {} - void AppendBuffer(SinkBuffer&, std::vector&) override {} + void AppendBuffer(SinkBuffer&, std::span) override {} std::vector ReleaseBuffer(u64) override { return {}; } diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp index 9a718a9cc..404dcd0e9 100644 --- a/src/audio_core/sink/sink_stream.cpp +++ b/src/audio_core/sink/sink_stream.cpp @@ -18,7 +18,7 @@ namespace AudioCore::Sink { -void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector& samples) { +void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span samples) { if (type == StreamType::In) { queue.enqueue(buffer); queued_buffers++; @@ -66,15 +66,16 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector& samples) { static_cast(std::clamp(right_sample, min, max)); } - samples.resize(samples.size() / system_channels * device_channels); + samples = samples.subspan(0, samples.size() / system_channels * device_channels); } else if (system_channels == 2 && device_channels == 6) { // We need moar samples! Not all games will provide 6 channel audio. // TODO: Implement some upmixing here. Currently just passthrough, with other // channels left as silence. - std::vector new_samples(samples.size() / system_channels * device_channels, 0); + auto new_size = samples.size() / system_channels * device_channels; + tmp_samples.resize_destructive(new_size); - for (u32 read_index = 0, write_index = 0; read_index < samples.size(); + for (u32 read_index = 0, write_index = 0; read_index < new_size; read_index += system_channels, write_index += device_channels) { const auto left_sample{static_cast(std::clamp( static_cast( @@ -82,7 +83,7 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector& samples) { volume), min, max))}; - new_samples[write_index + static_cast(Channels::FrontLeft)] = left_sample; + tmp_samples[write_index + static_cast(Channels::FrontLeft)] = left_sample; const auto right_sample{static_cast(std::clamp( static_cast( @@ -90,9 +91,9 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector& samples) { volume), min, max))}; - new_samples[write_index + static_cast(Channels::FrontRight)] = right_sample; + tmp_samples[write_index + static_cast(Channels::FrontRight)] = right_sample; } - samples = std::move(new_samples); + samples = std::span(tmp_samples); } else if (volume != 1.0f) { for (u32 i = 0; i < samples.size(); i++) { diff --git a/src/audio_core/sink/sink_stream.h b/src/audio_core/sink/sink_stream.h index 41cbadc9c..98d72ace1 100644 --- a/src/audio_core/sink/sink_stream.h +++ b/src/audio_core/sink/sink_stream.h @@ -16,6 +16,7 @@ #include "common/polyfill_thread.h" #include "common/reader_writer_queue.h" #include "common/ring_buffer.h" +#include "common/scratch_buffer.h" #include "common/thread.h" namespace Core { @@ -170,7 +171,7 @@ public: * @param buffer - Audio buffer information to be queued. * @param samples - The s16 samples to be queue for playback. */ - virtual void AppendBuffer(SinkBuffer& buffer, std::vector& samples); + virtual void AppendBuffer(SinkBuffer& buffer, std::span samples); /** * Release a buffer. Audio In only, will fill a buffer with recorded samples. @@ -255,6 +256,8 @@ private: /// Signalled when ring buffer entries are consumed std::condition_variable_any release_cv; std::mutex release_mutex; + /// Temporary buffer for appending samples when upmixing + Common::ScratchBuffer tmp_samples{}; }; using SinkStreamPtr = std::unique_ptr; diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h index 4c328ab44..416680d44 100644 --- a/src/common/ring_buffer.h +++ b/src/common/ring_buffer.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -53,7 +54,7 @@ public: return push_count; } - std::size_t Push(const std::vector& input) { + std::size_t Push(const std::span input) { return Push(input.data(), input.size()); } diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index a69a5a7af..6fe907953 100644 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h @@ -3,6 +3,9 @@ #pragma once +#include + +#include "common/concepts.h" #include "common/make_unique_for_overwrite.h" namespace Common { @@ -16,6 +19,12 @@ namespace Common { template class ScratchBuffer { public: + using iterator = T*; + using const_iterator = const T*; + using value_type = T; + using element_type = T; + using iterator_category = std::contiguous_iterator_tag; + ScratchBuffer() = default; explicit ScratchBuffer(size_t initial_capacity) diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp index b7da3eee7..3e5b735b1 100644 --- a/src/core/hle/kernel/k_synchronization_object.cpp +++ b/src/core/hle/kernel/k_synchronization_object.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/k_synchronization_object.h" @@ -75,7 +76,7 @@ Result KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index, KSynchronizationObject** objects, const s32 num_objects, s64 timeout) { // Allocate space on stack for thread nodes. - std::vector thread_nodes(num_objects); + std::array thread_nodes; // Prepare for wait. KThread* thread = GetCurrentThreadPointer(kernel); diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 908811e2c..adb6ec581 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -909,7 +909,7 @@ Result KThread::SetActivity(Svc::ThreadActivity activity) { R_SUCCEED(); } -Result KThread::GetThreadContext3(std::vector& out) { +Result KThread::GetThreadContext3(Common::ScratchBuffer& out) { // Lock ourselves. KScopedLightLock lk{m_activity_pause_lock}; @@ -927,15 +927,13 @@ Result KThread::GetThreadContext3(std::vector& out) { // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. auto context = GetContext64(); context.pstate &= 0xFF0FFE20; - - out.resize(sizeof(context)); + out.resize_destructive(sizeof(context)); std::memcpy(out.data(), std::addressof(context), sizeof(context)); } else { // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. auto context = GetContext32(); context.cpsr &= 0xFF0FFE20; - - out.resize(sizeof(context)); + out.resize_destructive(sizeof(context)); std::memcpy(out.data(), std::addressof(context), sizeof(context)); } } diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index 37fe5db77..dd662b3f8 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -15,6 +15,7 @@ #include "common/intrusive_list.h" #include "common/intrusive_red_black_tree.h" +#include "common/scratch_buffer.h" #include "common/spin_lock.h" #include "core/arm/arm_interface.h" #include "core/hle/kernel/k_affinity_mask.h" @@ -567,7 +568,7 @@ public: void RemoveWaiter(KThread* thread); - Result GetThreadContext3(std::vector& out); + Result GetThreadContext3(Common::ScratchBuffer& out); KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) { return this->RemoveWaiterByKey(out_has_waiters, key, false); diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp index ea03068aa..60247df2e 100644 --- a/src/core/hle/kernel/svc/svc_ipc.cpp +++ b/src/core/hle/kernel/svc/svc_ipc.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/scope_exit.h" +#include "common/scratch_buffer.h" #include "core/core.h" #include "core/hle/kernel/k_client_session.h" #include "core/hle/kernel/k_process.h" @@ -45,11 +46,11 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad handles_addr, static_cast(sizeof(Handle) * num_handles)), ResultInvalidPointer); - std::vector handles(num_handles); + std::array handles; GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles); // Convert handle list to object table. - std::vector objs(num_handles); + std::array objs; R_UNLESS(handle_table.GetMultipleObjects(objs.data(), handles.data(), num_handles), ResultInvalidHandle); @@ -80,7 +81,7 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad // Wait for an object. s32 index; Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(), - static_cast(objs.size()), timeout_ns); + num_handles, timeout_ns); if (result == ResultTimedOut) { R_RETURN(result); } diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp index 04d65f0bd..53df5bcd8 100644 --- a/src/core/hle/kernel/svc/svc_synchronization.cpp +++ b/src/core/hle/kernel/svc/svc_synchronization.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/scope_exit.h" +#include "common/scratch_buffer.h" #include "core/core.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_readable_event.h" @@ -54,7 +55,7 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons // Get the synchronization context. auto& kernel = system.Kernel(); auto& handle_table = GetCurrentProcess(kernel).GetHandleTable(); - std::vector objs(num_handles); + std::array objs; // Copy user handles. if (num_handles > 0) { @@ -72,8 +73,8 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons }); // Wait on the objects. - Result res = KSynchronizationObject::Wait(kernel, out_index, objs.data(), - static_cast(objs.size()), timeout_ns); + Result res = + KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout_ns); R_SUCCEED_IF(res == ResultSessionClosed); R_RETURN(res); @@ -87,8 +88,7 @@ Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_ha // Ensure number of handles is valid. R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange); - - std::vector handles(num_handles); + std::array handles; if (num_handles > 0) { GetCurrentMemory(system.Kernel()) .ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle)); diff --git a/src/core/hle/kernel/svc/svc_thread.cpp b/src/core/hle/kernel/svc/svc_thread.cpp index 37b54079c..36b94e6bf 100644 --- a/src/core/hle/kernel/svc/svc_thread.cpp +++ b/src/core/hle/kernel/svc/svc_thread.cpp @@ -174,7 +174,7 @@ Result GetThreadContext3(Core::System& system, u64 out_context, Handle thread_ha } // Get the thread context. - std::vector context; + static thread_local Common::ScratchBuffer context; R_TRY(thread->GetThreadContext3(context)); // Copy the thread context to user space. diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp index f0640c64f..c8d574993 100644 --- a/src/core/hle/service/audio/audin_u.cpp +++ b/src/core/hle/service/audio/audin_u.cpp @@ -5,6 +5,7 @@ #include "audio_core/renderer/audio_device.h" #include "common/common_funcs.h" #include "common/logging/log.h" +#include "common/settings.h" #include "common/string_util.h" #include "core/core.h" #include "core/hle/kernel/k_event.h" @@ -123,19 +124,13 @@ private: void GetReleasedAudioInBuffer(HLERequestContext& ctx) { const auto write_buffer_size = ctx.GetWriteBufferNumElements(); - std::vector released_buffers(write_buffer_size); + tmp_buffer.resize_destructive(write_buffer_size); + tmp_buffer[0] = 0; - const auto count = impl->GetReleasedBuffers(released_buffers); + const auto count = impl->GetReleasedBuffers(tmp_buffer); - [[maybe_unused]] std::string tags{}; - for (u32 i = 0; i < count; i++) { - tags += fmt::format("{:08X}, ", released_buffers[i]); - } - [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()}; - LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count, - tags); + ctx.WriteBuffer(tmp_buffer); - ctx.WriteBuffer(released_buffers); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); rb.Push(count); @@ -200,6 +195,7 @@ private: KernelHelpers::ServiceContext service_context; Kernel::KEvent* event; std::shared_ptr impl; + Common::ScratchBuffer tmp_buffer; }; AudInU::AudInU(Core::System& system_) diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 3e62fa4fc..032c8c11f 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -123,19 +123,13 @@ private: void GetReleasedAudioOutBuffers(HLERequestContext& ctx) { const auto write_buffer_size = ctx.GetWriteBufferNumElements(); - std::vector released_buffers(write_buffer_size); + tmp_buffer.resize_destructive(write_buffer_size); + tmp_buffer[0] = 0; - const auto count = impl->GetReleasedBuffers(released_buffers); + const auto count = impl->GetReleasedBuffers(tmp_buffer); - [[maybe_unused]] std::string tags{}; - for (u32 i = 0; i < count; i++) { - tags += fmt::format("{:08X}, ", released_buffers[i]); - } - [[maybe_unused]] const auto sessionid{impl->GetSystem().GetSessionId()}; - LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count, - tags); + ctx.WriteBuffer(tmp_buffer); - ctx.WriteBuffer(released_buffers); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); rb.Push(count); @@ -211,6 +205,7 @@ private: KernelHelpers::ServiceContext service_context; Kernel::KEvent* event; std::shared_ptr impl; + Common::ScratchBuffer tmp_buffer; }; AudOutU::AudOutU(Core::System& system_) diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 7086d4750..12845c23a 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -116,28 +116,26 @@ private: // These buffers are written manually to avoid an issue with WriteBuffer throwing errors for // checking size 0. Performance size is 0 for most games. - std::vector output{}; - std::vector performance{}; auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0}; if (is_buffer_b) { const auto buffersB{ctx.BufferDescriptorB()}; - output.resize(buffersB[0].Size(), 0); - performance.resize(buffersB[1].Size(), 0); + tmp_output.resize_destructive(buffersB[0].Size()); + tmp_performance.resize_destructive(buffersB[1].Size()); } else { const auto buffersC{ctx.BufferDescriptorC()}; - output.resize(buffersC[0].Size(), 0); - performance.resize(buffersC[1].Size(), 0); + tmp_output.resize_destructive(buffersC[0].Size()); + tmp_performance.resize_destructive(buffersC[1].Size()); } - auto result = impl->RequestUpdate(input, performance, output); + auto result = impl->RequestUpdate(input, tmp_performance, tmp_output); if (result.IsSuccess()) { if (is_buffer_b) { - ctx.WriteBufferB(output.data(), output.size(), 0); - ctx.WriteBufferB(performance.data(), performance.size(), 1); + ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0); + ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1); } else { - ctx.WriteBufferC(output.data(), output.size(), 0); - ctx.WriteBufferC(performance.data(), performance.size(), 1); + ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0); + ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1); } } else { LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description); @@ -235,6 +233,8 @@ private: Kernel::KEvent* rendered_event; Manager& manager; std::unique_ptr impl; + Common::ScratchBuffer tmp_output; + Common::ScratchBuffer tmp_performance; }; class IAudioDevice final : public ServiceFramework { diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index 24ce37e87..d8e9c8719 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h @@ -4,6 +4,7 @@ #pragma once #include "audio_core/audio_render_manager.h" +#include "common/scratch_buffer.h" #include "core/hle/service/kernel_helpers.h" #include "core/hle/service/service.h" diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index 451ac224a..c835f6cb7 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -68,13 +68,13 @@ private: ExtraBehavior extra_behavior) { u32 consumed = 0; u32 sample_count = 0; - std::vector samples(ctx.GetWriteBufferNumElements()); + tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements()); if (extra_behavior == ExtraBehavior::ResetContext) { ResetDecoderContext(); } - if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) { + if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) { LOG_ERROR(Audio, "Failed to decode opus data"); IPC::ResponseBuilder rb{ctx, 2}; // TODO(ogniK): Use correct error code @@ -90,11 +90,11 @@ private: if (performance) { rb.Push(*performance); } - ctx.WriteBuffer(samples); + ctx.WriteBuffer(tmp_samples); } bool DecodeOpusData(u32& consumed, u32& sample_count, std::span input, - std::vector& output, u64* out_performance_time) const { + std::span output, u64* out_performance_time) const { const auto start_time = std::chrono::steady_clock::now(); const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); if (sizeof(OpusPacketHeader) > input.size()) { @@ -154,6 +154,7 @@ private: OpusDecoderPtr decoder; u32 sample_rate; u32 channel_count; + Common::ScratchBuffer tmp_samples; }; class IHardwareOpusDecoderManager final : public ServiceFramework { diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index ab1f30f9e..a04538d5d 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h @@ -34,7 +34,7 @@ public: * @returns The result code of the ioctl. */ virtual NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) = 0; + std::span output) = 0; /** * Handles an ioctl2 request. @@ -45,7 +45,7 @@ public: * @returns The result code of the ioctl. */ virtual NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) = 0; + std::span inline_input, std::span output) = 0; /** * Handles an ioctl3 request. @@ -56,7 +56,7 @@ public: * @returns The result code of the ioctl. */ virtual NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) = 0; + std::span output, std::span inline_output) = 0; /** * Called once a device is opened diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 0fe242e9d..05a43d8dc 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -18,19 +18,19 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core) nvdisp_disp0::~nvdisp_disp0() = default; NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvdisp_disp0::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) { + std::span output, std::span inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index bcd0e3ed5..daee05fe8 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -26,11 +26,11 @@ public: ~nvdisp_disp0() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) override; + std::span output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output) override; + std::span inline_input, std::span output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 681bd0867..07e570a9f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -28,7 +28,7 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Con nvhost_as_gpu::~nvhost_as_gpu() = default; NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { switch (command.group) { case 'A': switch (command.cmd) { @@ -61,13 +61,13 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span i } NvResult nvhost_as_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) { + std::span output, std::span inline_output) { switch (command.group) { case 'A': switch (command.cmd) { @@ -87,7 +87,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span i void nvhost_as_gpu::OnOpen(DeviceFD fd) {} void nvhost_as_gpu::OnClose(DeviceFD fd) {} -NvResult nvhost_as_gpu::AllocAsEx(std::span input, std::vector& output) { +NvResult nvhost_as_gpu::AllocAsEx(std::span input, std::span output) { IoctlAllocAsEx params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -141,7 +141,7 @@ NvResult nvhost_as_gpu::AllocAsEx(std::span input, std::vector& ou return NvResult::Success; } -NvResult nvhost_as_gpu::AllocateSpace(std::span input, std::vector& output) { +NvResult nvhost_as_gpu::AllocateSpace(std::span input, std::span output) { IoctlAllocSpace params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -220,7 +220,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { mapping_map.erase(offset); } -NvResult nvhost_as_gpu::FreeSpace(std::span input, std::vector& output) { +NvResult nvhost_as_gpu::FreeSpace(std::span input, std::span output) { IoctlFreeSpace params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -266,15 +266,14 @@ NvResult nvhost_as_gpu::FreeSpace(std::span input, std::vector& ou return NvResult::Success; } -NvResult nvhost_as_gpu::Remap(std::span input, std::vector& output) { +NvResult nvhost_as_gpu::Remap(std::span input, std::span output) { const auto num_entries = input.size() / sizeof(IoctlRemapEntry); LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); - std::vector entries(num_entries); - std::memcpy(entries.data(), input.data(), input.size()); - std::scoped_lock lock(mutex); + entries.resize_destructive(num_entries); + std::memcpy(entries.data(), input.data(), input.size()); if (!vm.initialised) { return NvResult::BadValue; @@ -320,7 +319,7 @@ NvResult nvhost_as_gpu::Remap(std::span input, std::vector& output return NvResult::Success; } -NvResult nvhost_as_gpu::MapBufferEx(std::span input, std::vector& output) { +NvResult nvhost_as_gpu::MapBufferEx(std::span input, std::span output) { IoctlMapBufferEx params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -424,7 +423,7 @@ NvResult nvhost_as_gpu::MapBufferEx(std::span input, std::vector& return NvResult::Success; } -NvResult nvhost_as_gpu::UnmapBuffer(std::span input, std::vector& output) { +NvResult nvhost_as_gpu::UnmapBuffer(std::span input, std::span output) { IoctlUnmapBuffer params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -463,7 +462,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(std::span input, std::vector& return NvResult::Success; } -NvResult nvhost_as_gpu::BindChannel(std::span input, std::vector& output) { +NvResult nvhost_as_gpu::BindChannel(std::span input, std::span output) { IoctlBindChannel params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); @@ -492,7 +491,7 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) { }; } -NvResult nvhost_as_gpu::GetVARegions(std::span input, std::vector& output) { +NvResult nvhost_as_gpu::GetVARegions(std::span input, std::span output) { IoctlGetVaRegions params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -511,8 +510,8 @@ NvResult nvhost_as_gpu::GetVARegions(std::span input, std::vector& return NvResult::Success; } -NvResult nvhost_as_gpu::GetVARegions(std::span input, std::vector& output, - std::vector& inline_output) { +NvResult nvhost_as_gpu::GetVARegions(std::span input, std::span output, + std::span inline_output) { IoctlGetVaRegions params{}; std::memcpy(¶ms, input.data(), input.size()); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 1aba8d579..2af3e1260 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -15,6 +15,7 @@ #include "common/address_space.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "common/swap.h" #include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" @@ -48,11 +49,11 @@ public: ~nvhost_as_gpu() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) override; + std::span output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output) override; + std::span inline_input, std::span output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -138,18 +139,18 @@ private: static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2, "IoctlGetVaRegions is incorrect size"); - NvResult AllocAsEx(std::span input, std::vector& output); - NvResult AllocateSpace(std::span input, std::vector& output); - NvResult Remap(std::span input, std::vector& output); - NvResult MapBufferEx(std::span input, std::vector& output); - NvResult UnmapBuffer(std::span input, std::vector& output); - NvResult FreeSpace(std::span input, std::vector& output); - NvResult BindChannel(std::span input, std::vector& output); + NvResult AllocAsEx(std::span input, std::span output); + NvResult AllocateSpace(std::span input, std::span output); + NvResult Remap(std::span input, std::span output); + NvResult MapBufferEx(std::span input, std::span output); + NvResult UnmapBuffer(std::span input, std::span output); + NvResult FreeSpace(std::span input, std::span output); + NvResult BindChannel(std::span input, std::span output); void GetVARegionsImpl(IoctlGetVaRegions& params); - NvResult GetVARegions(std::span input, std::vector& output); - NvResult GetVARegions(std::span input, std::vector& output, - std::vector& inline_output); + NvResult GetVARegions(std::span input, std::span output); + NvResult GetVARegions(std::span input, std::span output, + std::span inline_output); void FreeMappingLocked(u64 offset); @@ -212,6 +213,7 @@ private: bool initialised{}; } vm; std::shared_ptr gmmu; + Common::ScratchBuffer entries; // s32 channel{}; // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index e12025560..4d55554b4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -35,7 +35,7 @@ nvhost_ctrl::~nvhost_ctrl() { } NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -64,13 +64,13 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span inp } NvResult nvhost_ctrl::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_outpu) { + std::span output, std::span inline_outpu) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -79,7 +79,7 @@ void nvhost_ctrl::OnOpen(DeviceFD fd) {} void nvhost_ctrl::OnClose(DeviceFD fd) {} -NvResult nvhost_ctrl::NvOsGetConfigU32(std::span input, std::vector& output) { +NvResult nvhost_ctrl::NvOsGetConfigU32(std::span input, std::span output) { IocGetConfigParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(), @@ -87,7 +87,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(std::span input, std::vector input, std::vector& output, +NvResult nvhost_ctrl::IocCtrlEventWait(std::span input, std::span output, bool is_allocation) { IocCtrlEventWaitParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); @@ -231,7 +231,7 @@ NvResult nvhost_ctrl::FreeEvent(u32 slot) { return NvResult::Success; } -NvResult nvhost_ctrl::IocCtrlEventRegister(std::span input, std::vector& output) { +NvResult nvhost_ctrl::IocCtrlEventRegister(std::span input, std::span output) { IocCtrlEventRegisterParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); const u32 event_id = params.user_event_id; @@ -252,7 +252,7 @@ NvResult nvhost_ctrl::IocCtrlEventRegister(std::span input, std::vecto return NvResult::Success; } -NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span input, std::vector& output) { +NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span input, std::span output) { IocCtrlEventUnregisterParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); const u32 event_id = params.user_event_id & 0x00FF; @@ -262,8 +262,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span input, std::vec return FreeEvent(event_id); } -NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span input, - std::vector& output) { +NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span input, std::span output) { IocCtrlEventUnregisterBatchParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); u64 event_mask = params.user_events; @@ -281,7 +280,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span input, return NvResult::Success; } -NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span input, std::vector& output) { +NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span input, std::span output) { IocCtrlEventClearParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index dd2e7888a..2efed4862 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -26,11 +26,11 @@ public: ~nvhost_ctrl() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) override; + std::span output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output) override; + std::span inline_input, std::span output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -186,13 +186,12 @@ private: static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8, "IocCtrlEventKill is incorrect size"); - NvResult NvOsGetConfigU32(std::span input, std::vector& output); - NvResult IocCtrlEventWait(std::span input, std::vector& output, - bool is_allocation); - NvResult IocCtrlEventRegister(std::span input, std::vector& output); - NvResult IocCtrlEventUnregister(std::span input, std::vector& output); - NvResult IocCtrlEventUnregisterBatch(std::span input, std::vector& output); - NvResult IocCtrlClearEventWait(std::span input, std::vector& output); + NvResult NvOsGetConfigU32(std::span input, std::span output); + NvResult IocCtrlEventWait(std::span input, std::span output, bool is_allocation); + NvResult IocCtrlEventRegister(std::span input, std::span output); + NvResult IocCtrlEventUnregister(std::span input, std::span output); + NvResult IocCtrlEventUnregisterBatch(std::span input, std::span output); + NvResult IocCtrlClearEventWait(std::span input, std::span output); NvResult FreeEvent(u32 slot); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index be3c083db..6081d92e9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -22,7 +22,7 @@ nvhost_ctrl_gpu::~nvhost_ctrl_gpu() { } NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { switch (command.group) { case 'G': switch (command.cmd) { @@ -54,13 +54,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span } NvResult nvhost_ctrl_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) { + std::span output, std::span inline_output) { switch (command.group) { case 'G': switch (command.cmd) { @@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {} void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} -NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span input, std::vector& output) { +NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span input, std::span output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlCharacteristics params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -127,8 +127,8 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span input, std::vec return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span input, std::vector& output, - std::vector& inline_output) { +NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span input, std::span output, + std::span inline_output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlCharacteristics params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -175,7 +175,7 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span input, std::vec return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span input, std::vector& output) { +NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span input, std::span output) { IoctlGpuGetTpcMasksArgs params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); @@ -186,8 +186,8 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span input, std::vector return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span input, std::vector& output, - std::vector& inline_output) { +NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span input, std::span output, + std::span inline_output) { IoctlGpuGetTpcMasksArgs params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); @@ -199,7 +199,7 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span input, std::vector return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span input, std::vector& output) { +NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span input, std::span output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlActiveSlotMask params{}; @@ -212,7 +212,7 @@ NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span input, std::vect return NvResult::Success; } -NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span input, std::vector& output) { +NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span input, std::span output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlZcullGetCtxSize params{}; @@ -224,7 +224,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span input, std::vector return NvResult::Success; } -NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span input, std::vector& output) { +NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span input, std::span output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlNvgpuGpuZcullGetInfoArgs params{}; @@ -247,7 +247,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span input, std::vector input, std::vector& output) { +NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span input, std::span output) { LOG_WARNING(Service_NVDRV, "(STUBBED) called"); IoctlZbcSetTable params{}; @@ -263,7 +263,7 @@ NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span input, std::vector return NvResult::Success; } -NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span input, std::vector& output) { +NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span input, std::span output) { LOG_WARNING(Service_NVDRV, "(STUBBED) called"); IoctlZbcQueryTable params{}; @@ -273,7 +273,7 @@ NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span input, std::vector input, std::vector& output) { +NvResult nvhost_ctrl_gpu::FlushL2(std::span input, std::span output) { LOG_WARNING(Service_NVDRV, "(STUBBED) called"); IoctlFlushL2 params{}; @@ -283,7 +283,7 @@ NvResult nvhost_ctrl_gpu::FlushL2(std::span input, std::vector& ou return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetGpuTime(std::span input, std::vector& output) { +NvResult nvhost_ctrl_gpu::GetGpuTime(std::span input, std::span output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlGetGpuTime params{}; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index b9333d9d3..97995551c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h @@ -22,11 +22,11 @@ public: ~nvhost_ctrl_gpu() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) override; + std::span output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output) override; + std::span inline_input, std::span output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -151,21 +151,21 @@ private: }; static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size"); - NvResult GetCharacteristics(std::span input, std::vector& output); - NvResult GetCharacteristics(std::span input, std::vector& output, - std::vector& inline_output); - - NvResult GetTPCMasks(std::span input, std::vector& output); - NvResult GetTPCMasks(std::span input, std::vector& output, - std::vector& inline_output); - - NvResult GetActiveSlotMask(std::span input, std::vector& output); - NvResult ZCullGetCtxSize(std::span input, std::vector& output); - NvResult ZCullGetInfo(std::span input, std::vector& output); - NvResult ZBCSetTable(std::span input, std::vector& output); - NvResult ZBCQueryTable(std::span input, std::vector& output); - NvResult FlushL2(std::span input, std::vector& output); - NvResult GetGpuTime(std::span input, std::vector& output); + NvResult GetCharacteristics(std::span input, std::span output); + NvResult GetCharacteristics(std::span input, std::span output, + std::span inline_output); + + NvResult GetTPCMasks(std::span input, std::span output); + NvResult GetTPCMasks(std::span input, std::span output, + std::span inline_output); + + NvResult GetActiveSlotMask(std::span input, std::span output); + NvResult ZCullGetCtxSize(std::span input, std::span output); + NvResult ZCullGetInfo(std::span input, std::span output); + NvResult ZBCSetTable(std::span input, std::span output); + NvResult ZBCQueryTable(std::span input, std::span output); + NvResult FlushL2(std::span input, std::span output); + NvResult GetGpuTime(std::span input, std::span output); EventInterface& events_interface; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 453a965dc..46a25fcab 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -47,7 +47,7 @@ nvhost_gpu::~nvhost_gpu() { } NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -99,7 +99,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span inpu }; NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { switch (command.group) { case 'H': switch (command.cmd) { @@ -113,7 +113,7 @@ NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span inpu } NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) { + std::span output, std::span inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -121,7 +121,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span inpu void nvhost_gpu::OnOpen(DeviceFD fd) {} void nvhost_gpu::OnClose(DeviceFD fd) {} -NvResult nvhost_gpu::SetNVMAPfd(std::span input, std::vector& output) { +NvResult nvhost_gpu::SetNVMAPfd(std::span input, std::span output) { IoctlSetNvmapFD params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); @@ -130,7 +130,7 @@ NvResult nvhost_gpu::SetNVMAPfd(std::span input, std::vector& outp return NvResult::Success; } -NvResult nvhost_gpu::SetClientData(std::span input, std::vector& output) { +NvResult nvhost_gpu::SetClientData(std::span input, std::span output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlClientData params{}; @@ -139,7 +139,7 @@ NvResult nvhost_gpu::SetClientData(std::span input, std::vector& o return NvResult::Success; } -NvResult nvhost_gpu::GetClientData(std::span input, std::vector& output) { +NvResult nvhost_gpu::GetClientData(std::span input, std::span output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlClientData params{}; @@ -149,7 +149,7 @@ NvResult nvhost_gpu::GetClientData(std::span input, std::vector& o return NvResult::Success; } -NvResult nvhost_gpu::ZCullBind(std::span input, std::vector& output) { +NvResult nvhost_gpu::ZCullBind(std::span input, std::span output) { std::memcpy(&zcull_params, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va, zcull_params.mode); @@ -158,7 +158,7 @@ NvResult nvhost_gpu::ZCullBind(std::span input, std::vector& outpu return NvResult::Success; } -NvResult nvhost_gpu::SetErrorNotifier(std::span input, std::vector& output) { +NvResult nvhost_gpu::SetErrorNotifier(std::span input, std::span output) { IoctlSetErrorNotifier params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset, @@ -168,14 +168,14 @@ NvResult nvhost_gpu::SetErrorNotifier(std::span input, std::vector return NvResult::Success; } -NvResult nvhost_gpu::SetChannelPriority(std::span input, std::vector& output) { +NvResult nvhost_gpu::SetChannelPriority(std::span input, std::span output) { std::memcpy(&channel_priority, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority); return NvResult::Success; } -NvResult nvhost_gpu::AllocGPFIFOEx2(std::span input, std::vector& output) { +NvResult nvhost_gpu::AllocGPFIFOEx2(std::span input, std::span output) { IoctlAllocGpfifoEx2 params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_WARNING(Service_NVDRV, @@ -197,7 +197,7 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(std::span input, std::vector& return NvResult::Success; } -NvResult nvhost_gpu::AllocateObjectContext(std::span input, std::vector& output) { +NvResult nvhost_gpu::AllocateObjectContext(std::span input, std::span output) { IoctlAllocObjCtx params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num, @@ -208,7 +208,8 @@ NvResult nvhost_gpu::AllocateObjectContext(std::span input, std::vecto return NvResult::Success; } -static std::vector BuildWaitCommandList(NvFence fence) { +static boost::container::small_vector BuildWaitCommandList( + NvFence fence) { return { Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, Tegra::SubmissionMode::Increasing), @@ -219,35 +220,35 @@ static std::vector BuildWaitCommandList(NvFence fence) { }; } -static std::vector BuildIncrementCommandList(NvFence fence) { - std::vector result{ +static boost::container::small_vector BuildIncrementCommandList( + NvFence fence) { + boost::container::small_vector result{ Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, Tegra::SubmissionMode::Increasing), {}}; for (u32 count = 0; count < 2; ++count) { - result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, - Tegra::SubmissionMode::Increasing)); - result.emplace_back( + result.push_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, + Tegra::SubmissionMode::Increasing)); + result.push_back( BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id)); } return result; } -static std::vector BuildIncrementWithWfiCommandList(NvFence fence) { - std::vector result{ +static boost::container::small_vector BuildIncrementWithWfiCommandList( + NvFence fence) { + boost::container::small_vector result{ Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1, Tegra::SubmissionMode::Increasing), {}}; - const std::vector increment{BuildIncrementCommandList(fence)}; - - result.insert(result.end(), increment.begin(), increment.end()); - + auto increment_list{BuildIncrementCommandList(fence)}; + result.insert(result.end(), increment_list.begin(), increment_list.end()); return result; } -NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector& output, +NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span output, Tegra::CommandList&& entries) { LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, params.num_entries, params.flags.raw); @@ -293,7 +294,7 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector return NvResult::Success; } -NvResult nvhost_gpu::SubmitGPFIFOBase(std::span input, std::vector& output, +NvResult nvhost_gpu::SubmitGPFIFOBase(std::span input, std::span output, bool kickoff) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { UNIMPLEMENTED(); @@ -315,7 +316,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span input, std::vector } NvResult nvhost_gpu::SubmitGPFIFOBase(std::span input, std::span input_inline, - std::vector& output) { + std::span output) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { UNIMPLEMENTED(); return NvResult::InvalidSize; @@ -327,7 +328,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span input, std::span input, std::vector& output) { +NvResult nvhost_gpu::GetWaitbase(std::span input, std::span output) { IoctlGetWaitbase params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); @@ -337,7 +338,7 @@ NvResult nvhost_gpu::GetWaitbase(std::span input, std::vector& out return NvResult::Success; } -NvResult nvhost_gpu::ChannelSetTimeout(std::span input, std::vector& output) { +NvResult nvhost_gpu::ChannelSetTimeout(std::span input, std::span output) { IoctlChannelSetTimeout params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlChannelSetTimeout)); LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout); @@ -345,7 +346,7 @@ NvResult nvhost_gpu::ChannelSetTimeout(std::span input, std::vector input, std::vector& output) { +NvResult nvhost_gpu::ChannelSetTimeslice(std::span input, std::span output) { IoctlSetTimeslice params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSetTimeslice)); LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 3ca58202d..529c20526 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -41,11 +41,11 @@ public: ~nvhost_gpu() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) override; + std::span output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output) override; + std::span inline_input, std::span output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -186,23 +186,23 @@ private: u32_le channel_priority{}; u32_le channel_timeslice{}; - NvResult SetNVMAPfd(std::span input, std::vector& output); - NvResult SetClientData(std::span input, std::vector& output); - NvResult GetClientData(std::span input, std::vector& output); - NvResult ZCullBind(std::span input, std::vector& output); - NvResult SetErrorNotifier(std::span input, std::vector& output); - NvResult SetChannelPriority(std::span input, std::vector& output); - NvResult AllocGPFIFOEx2(std::span input, std::vector& output); - NvResult AllocateObjectContext(std::span input, std::vector& output); - NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector& output, + NvResult SetNVMAPfd(std::span input, std::span output); + NvResult SetClientData(std::span input, std::span output); + NvResult GetClientData(std::span input, std::span output); + NvResult ZCullBind(std::span input, std::span output); + NvResult SetErrorNotifier(std::span input, std::span output); + NvResult SetChannelPriority(std::span input, std::span output); + NvResult AllocGPFIFOEx2(std::span input, std::span output); + NvResult AllocateObjectContext(std::span input, std::span output); + NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span output, Tegra::CommandList&& entries); - NvResult SubmitGPFIFOBase(std::span input, std::vector& output, + NvResult SubmitGPFIFOBase(std::span input, std::span output, bool kickoff = false); NvResult SubmitGPFIFOBase(std::span input, std::span input_inline, - std::vector& output); - NvResult GetWaitbase(std::span input, std::vector& output); - NvResult ChannelSetTimeout(std::span input, std::vector& output); - NvResult ChannelSetTimeslice(std::span input, std::vector& output); + std::span output); + NvResult GetWaitbase(std::span input, std::span output); + NvResult ChannelSetTimeout(std::span input, std::span output); + NvResult ChannelSetTimeslice(std::span input, std::span output); EventInterface& events_interface; NvCore::Container& core; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index dc45169ad..a174442a6 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -16,7 +16,7 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_) nvhost_nvdec::~nvhost_nvdec() = default; NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -56,13 +56,13 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span in } NvResult nvhost_nvdec::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) { + std::span output, std::span inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 0d615bbcb..ad2233c49 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -14,11 +14,11 @@ public: ~nvhost_nvdec() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) override; + std::span output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output) override; + std::span inline_input, std::span output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 1ab51f10b..61649aa4a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -36,7 +36,7 @@ std::size_t SliceVectors(std::span input, std::vector& dst, std::si // Writes the data in src to an offset into the dst vector. The offset is specified in bytes // Returns the number of bytes written into dst. template -std::size_t WriteVectors(std::vector& dst, const std::vector& src, std::size_t offset) { +std::size_t WriteVectors(std::span dst, const std::vector& src, std::size_t offset) { if (src.empty()) { return 0; } @@ -72,8 +72,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(std::span input) { return NvResult::Success; } -NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span input, - std::vector& output) { +NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span input, std::span output) { IoctlSubmit params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); @@ -121,7 +120,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span input, return NvResult::Success; } -NvResult nvhost_nvdec_common::GetSyncpoint(std::span input, std::vector& output) { +NvResult nvhost_nvdec_common::GetSyncpoint(std::span input, std::span output) { IoctlGetSyncpoint params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); @@ -133,7 +132,7 @@ NvResult nvhost_nvdec_common::GetSyncpoint(std::span input, std::vecto return NvResult::Success; } -NvResult nvhost_nvdec_common::GetWaitbase(std::span input, std::vector& output) { +NvResult nvhost_nvdec_common::GetWaitbase(std::span input, std::span output) { IoctlGetWaitbase params{}; LOG_CRITICAL(Service_NVDRV, "called WAITBASE"); std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); @@ -142,7 +141,7 @@ NvResult nvhost_nvdec_common::GetWaitbase(std::span input, std::vector return NvResult::Success; } -NvResult nvhost_nvdec_common::MapBuffer(std::span input, std::vector& output) { +NvResult nvhost_nvdec_common::MapBuffer(std::span input, std::span output) { IoctlMapBuffer params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); std::vector cmd_buffer_handles(params.num_entries); @@ -159,7 +158,7 @@ NvResult nvhost_nvdec_common::MapBuffer(std::span input, std::vector input, std::vector& output) { +NvResult nvhost_nvdec_common::UnmapBuffer(std::span input, std::span output) { IoctlMapBuffer params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); std::vector cmd_buffer_handles(params.num_entries); @@ -173,7 +172,7 @@ NvResult nvhost_nvdec_common::UnmapBuffer(std::span input, std::vector return NvResult::Success; } -NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span input, std::vector& output) { +NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span input, std::span output) { std::memcpy(&submit_timeout, input.data(), input.size()); LOG_WARNING(Service_NVDRV, "(STUBBED) called"); return NvResult::Success; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index 5af26a26f..9bb573bfe 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -108,12 +108,12 @@ protected: /// Ioctl command implementations NvResult SetNVMAPfd(std::span input); - NvResult Submit(DeviceFD fd, std::span input, std::vector& output); - NvResult GetSyncpoint(std::span input, std::vector& output); - NvResult GetWaitbase(std::span input, std::vector& output); - NvResult MapBuffer(std::span input, std::vector& output); - NvResult UnmapBuffer(std::span input, std::vector& output); - NvResult SetSubmitTimeout(std::span input, std::vector& output); + NvResult Submit(DeviceFD fd, std::span input, std::span output); + NvResult GetSyncpoint(std::span input, std::span output); + NvResult GetWaitbase(std::span input, std::span output); + NvResult MapBuffer(std::span input, std::span output); + NvResult UnmapBuffer(std::span input, std::span output); + NvResult SetSubmitTimeout(std::span input, std::span output); Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 39f30e7c8..a05c8cdae 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp @@ -13,7 +13,7 @@ nvhost_nvjpg::nvhost_nvjpg(Core::System& system_) : nvdevice{system_} {} nvhost_nvjpg::~nvhost_nvjpg() = default; NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { switch (command.group) { case 'H': switch (command.cmd) { @@ -32,13 +32,13 @@ NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span in } NvResult nvhost_nvjpg::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) { + std::span output, std::span inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -46,7 +46,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span in void nvhost_nvjpg::OnOpen(DeviceFD fd) {} void nvhost_nvjpg::OnClose(DeviceFD fd) {} -NvResult nvhost_nvjpg::SetNVMAPfd(std::span input, std::vector& output) { +NvResult nvhost_nvjpg::SetNVMAPfd(std::span input, std::span output) { IoctlSetNvmapFD params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h index 41b57e872..5623e0d47 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h @@ -16,11 +16,11 @@ public: ~nvhost_nvjpg() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) override; + std::span output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output) override; + std::span inline_input, std::span output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -33,7 +33,7 @@ private: s32_le nvmap_fd{}; - NvResult SetNVMAPfd(std::span input, std::vector& output); + NvResult SetNVMAPfd(std::span input, std::span output); }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index b0ea402a7..c0b8684c3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -16,7 +16,7 @@ nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_) nvhost_vic::~nvhost_vic() = default; NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -56,13 +56,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span inpu } NvResult nvhost_vic::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) { + std::span output, std::span inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index b5e350a83..cadbcb0a5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -13,11 +13,11 @@ public: ~nvhost_vic(); NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) override; + std::span output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output) override; + std::span inline_input, std::span output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 07417f045..e7f7e273b 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp @@ -26,7 +26,7 @@ nvmap::nvmap(Core::System& system_, NvCore::Container& container_) nvmap::~nvmap() = default; NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { switch (command.group) { case 0x1: switch (command.cmd) { @@ -55,13 +55,13 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span input, } NvResult nvmap::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } -NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) { +NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -69,7 +69,7 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span input, void nvmap::OnOpen(DeviceFD fd) {} void nvmap::OnClose(DeviceFD fd) {} -NvResult nvmap::IocCreate(std::span input, std::vector& output) { +NvResult nvmap::IocCreate(std::span input, std::span output) { IocCreateParams params; std::memcpy(¶ms, input.data(), sizeof(params)); LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); @@ -89,7 +89,7 @@ NvResult nvmap::IocCreate(std::span input, std::vector& output) { return NvResult::Success; } -NvResult nvmap::IocAlloc(std::span input, std::vector& output) { +NvResult nvmap::IocAlloc(std::span input, std::span output) { IocAllocParams params; std::memcpy(¶ms, input.data(), sizeof(params)); LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); @@ -137,7 +137,7 @@ NvResult nvmap::IocAlloc(std::span input, std::vector& output) { return result; } -NvResult nvmap::IocGetId(std::span input, std::vector& output) { +NvResult nvmap::IocGetId(std::span input, std::span output) { IocGetIdParams params; std::memcpy(¶ms, input.data(), sizeof(params)); @@ -161,7 +161,7 @@ NvResult nvmap::IocGetId(std::span input, std::vector& output) { return NvResult::Success; } -NvResult nvmap::IocFromId(std::span input, std::vector& output) { +NvResult nvmap::IocFromId(std::span input, std::span output) { IocFromIdParams params; std::memcpy(¶ms, input.data(), sizeof(params)); @@ -192,7 +192,7 @@ NvResult nvmap::IocFromId(std::span input, std::vector& output) { return NvResult::Success; } -NvResult nvmap::IocParam(std::span input, std::vector& output) { +NvResult nvmap::IocParam(std::span input, std::span output) { enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 }; IocParamParams params; @@ -241,7 +241,7 @@ NvResult nvmap::IocParam(std::span input, std::vector& output) { return NvResult::Success; } -NvResult nvmap::IocFree(std::span input, std::vector& output) { +NvResult nvmap::IocFree(std::span input, std::span output) { IocFreeParams params; std::memcpy(¶ms, input.data(), sizeof(params)); diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 82bd3b118..40c65b430 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h @@ -27,11 +27,11 @@ public: nvmap& operator=(const nvmap&) = delete; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) override; + std::span output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output) override; + std::span inline_input, std::span output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -106,12 +106,12 @@ private: }; static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); - NvResult IocCreate(std::span input, std::vector& output); - NvResult IocAlloc(std::span input, std::vector& output); - NvResult IocGetId(std::span input, std::vector& output); - NvResult IocFromId(std::span input, std::vector& output); - NvResult IocParam(std::span input, std::vector& output); - NvResult IocFree(std::span input, std::vector& output); + NvResult IocCreate(std::span input, std::span output); + NvResult IocAlloc(std::span input, std::span output); + NvResult IocGetId(std::span input, std::span output); + NvResult IocFromId(std::span input, std::span output); + NvResult IocParam(std::span input, std::span output); + NvResult IocFree(std::span input, std::span output); NvCore::Container& container; NvCore::NvMap& file; diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 3d774eec4..9e46ee8dd 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -130,7 +130,7 @@ DeviceFD Module::Open(const std::string& device_name) { } NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span input, - std::vector& output) { + std::span output) { if (fd < 0) { LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); return NvResult::InvalidState; @@ -147,7 +147,7 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span input, } NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output) { + std::span inline_input, std::span output) { if (fd < 0) { LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); return NvResult::InvalidState; @@ -163,8 +163,8 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span input, return itr->second->Ioctl2(fd, command, input, inline_input, output); } -NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span input, - std::vector& output, std::vector& inline_output) { +NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output) { if (fd < 0) { LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); return NvResult::InvalidState; diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 668be742b..d8622b3ca 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -80,13 +80,13 @@ public: DeviceFD Open(const std::string& device_name); /// Sends an ioctl command to the specified file descriptor. - NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, std::vector& output); + NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span input, std::span output); NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span input, - std::span inline_input, std::vector& output); + std::span inline_input, std::span output); - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::vector& output, - std::vector& inline_output); + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span input, std::span output, + std::span inline_output); /// Closes a device file descriptor and returns operation success. NvResult Close(DeviceFD fd); diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index d010a1e03..348207e25 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp @@ -63,12 +63,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) { } // Check device - std::vector output_buffer(ctx.GetWriteBufferSize(0)); + tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); const auto input_buffer = ctx.ReadBuffer(0); - const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer); + const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output); if (command.is_out != 0) { - ctx.WriteBuffer(output_buffer); + ctx.WriteBuffer(tmp_output); } IPC::ResponseBuilder rb{ctx, 3}; @@ -90,12 +90,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) { const auto input_buffer = ctx.ReadBuffer(0); const auto input_inlined_buffer = ctx.ReadBuffer(1); - std::vector output_buffer(ctx.GetWriteBufferSize(0)); + tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); const auto nv_result = - nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer); + nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output); if (command.is_out != 0) { - ctx.WriteBuffer(output_buffer); + ctx.WriteBuffer(tmp_output); } IPC::ResponseBuilder rb{ctx, 3}; @@ -116,14 +116,12 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) { } const auto input_buffer = ctx.ReadBuffer(0); - std::vector output_buffer(ctx.GetWriteBufferSize(0)); - std::vector output_buffer_inline(ctx.GetWriteBufferSize(1)); - - const auto nv_result = - nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline); + tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); + tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1)); + const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline); if (command.is_out != 0) { - ctx.WriteBuffer(output_buffer, 0); - ctx.WriteBuffer(output_buffer_inline, 1); + ctx.WriteBuffer(tmp_output, 0); + ctx.WriteBuffer(tmp_output_inline, 1); } IPC::ResponseBuilder rb{ctx, 3}; diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h index 881ea1a6b..4b593ff90 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.h +++ b/src/core/hle/service/nvdrv/nvdrv_interface.h @@ -4,6 +4,7 @@ #pragma once #include +#include "common/scratch_buffer.h" #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/service.h" @@ -33,6 +34,8 @@ private: u64 pid{}; bool is_initialized{}; + Common::ScratchBuffer tmp_output; + Common::ScratchBuffer tmp_output_inline; }; } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvnflinger/parcel.h b/src/core/hle/service/nvnflinger/parcel.h index fb56d75d7..23ba315a0 100644 --- a/src/core/hle/service/nvnflinger/parcel.h +++ b/src/core/hle/service/nvnflinger/parcel.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "common/alignment.h" #include "common/assert.h" @@ -167,7 +168,7 @@ public: private: template requires(std::is_trivially_copyable_v) - void WriteImpl(const T& val, std::vector& buffer) { + void WriteImpl(const T& val, boost::container::small_vector& buffer) { const size_t aligned_size = Common::AlignUp(sizeof(T), 4); const size_t old_size = buffer.size(); buffer.resize(old_size + aligned_size); @@ -176,8 +177,8 @@ private: } private: - std::vector m_data_buffer; - std::vector m_object_buffer; + boost::container::small_vector m_data_buffer; + boost::container::small_vector m_object_buffer; }; } // namespace Service::android diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index c3c2281bb..9ff4028c2 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -479,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; const size_t xfb_varying_index{base_index + element}; - if (xfb_varying_index < runtime_info.xfb_varyings.size()) { + if (xfb_varying_index < runtime_info.xfb_count) { xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0f86a8004..34592a01f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr } void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { - if (ctx.runtime_info.xfb_varyings.empty()) { + if (ctx.runtime_info.xfb_count == 0) { return; } ctx.AddCapability(spv::Capability::TransformFeedback); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index fd15f47ea..bec5db173 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -160,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invo const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; const size_t xfb_varying_index{base_attr_index + element}; - if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) { + if (xfb_varying_index < ctx.runtime_info.xfb_count) { xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 3b63c249f..619c0b138 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -84,7 +84,8 @@ struct RuntimeInfo { bool glasm_use_storage_buffers{}; /// Transform feedback state for each varying - std::vector xfb_varyings; + std::array xfb_varyings{}; + u32 xfb_count{0}; }; } // namespace Shader diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 45977d578..58a45ab67 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -207,7 +207,7 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am if (has_new_downloads) { memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); } - tmp_buffer.resize(amount); + tmp_buffer.resize_destructive(amount); cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); return true; @@ -1279,7 +1279,7 @@ template typename BufferCache

::OverlapResult BufferCache

::ResolveOverlaps(VAddr cpu_addr, u32 wanted_size) { static constexpr int STREAM_LEAP_THRESHOLD = 16; - std::vector overlap_ids; + boost::container::small_vector overlap_ids; VAddr begin = cpu_addr; VAddr end = cpu_addr + wanted_size; int stream_score = 0; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 63a120f7a..fe6068cfe 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches; struct OverlapResult { - std::vector ids; + boost::container::small_vector ids; VAddr begin; VAddr end; bool has_stream_leap = false; @@ -582,7 +582,7 @@ private: BufferId inline_buffer_id; std::array> CACHING_PAGEBITS)> page_table; - std::vector tmp_buffer; + Common::ScratchBuffer tmp_buffer; }; } // namespace VideoCommon diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 83112dfce..7d660af47 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -63,7 +63,6 @@ struct ChCommand { }; using ChCommandHeaderList = std::vector; -using ChCommandList = std::vector; struct ThiRegisters { u32_le increment_syncpt{}; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 1cdb690ed..8a2784cdc 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "common/bit_field.h" @@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub struct CommandList final { CommandList() = default; explicit CommandList(std::size_t size) : command_lists(size) {} - explicit CommandList(std::vector&& prefetch_command_list_) + explicit CommandList( + boost::container::small_vector&& prefetch_command_list_) : prefetch_command_list{std::move(prefetch_command_list_)} {} - std::vector command_lists; - std::vector prefetch_command_list; + boost::container::small_vector command_lists; + boost::container::small_vector prefetch_command_list; }; /** diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ebe5536de..bc1eb41e7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -108,9 +108,11 @@ void MaxwellDMA::Launch() { if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { ASSERT(regs.remap_const.component_size_minus_one == 3); accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); - std::vector tmp_buffer(regs.line_length_in, regs.remap_consta_value); + read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); + std::span span(reinterpret_cast(read_buffer.data()), regs.line_length_in); + std::ranges::fill(span, regs.remap_consta_value); memory_manager.WriteBlockUnsafe(regs.offset_out, - reinterpret_cast(tmp_buffer.data()), + reinterpret_cast(read_buffer.data()), regs.line_length_in * sizeof(u32)); } else { memory_manager.FlushCaching(); @@ -126,32 +128,32 @@ void MaxwellDMA::Launch() { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); - std::vector tmp_buffer(16); + read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { memory_manager.ReadBlockUnsafe( convert_linear_2_blocklinear_addr(regs.offset_in + offset), - tmp_buffer.data(), tmp_buffer.size()); - memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), - tmp_buffer.size()); + read_buffer.data(), read_buffer.size()); + memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), + read_buffer.size()); } } else if (is_src_pitch && !is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); - std::vector tmp_buffer(16); + read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { - memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), - tmp_buffer.size()); + memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), + read_buffer.size()); memory_manager.WriteBlockCached( convert_linear_2_blocklinear_addr(regs.offset_out + offset), - tmp_buffer.data(), tmp_buffer.size()); + read_buffer.data(), read_buffer.size()); } } else { if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { - std::vector tmp_buffer(regs.line_length_in); - memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), + read_buffer.resize_destructive(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), regs.line_length_in); - memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), + memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), regs.line_length_in); } } @@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { src_operand.address = regs.offset_in; DMA::BufferOperand dst_operand; - dst_operand.pitch = regs.pitch_out; + u32 abs_pitch_out = std::abs(static_cast(regs.pitch_out)); + dst_operand.pitch = abs_pitch_out; dst_operand.width = regs.line_length_in; dst_operand.height = regs.line_count; dst_operand.address = regs.offset_out; @@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - const size_t dst_size = static_cast(regs.pitch_out) * regs.line_count; + const size_t dst_size = static_cast(abs_pitch_out) * regs.line_count; read_buffer.resize_destructive(src_size); write_buffer.resize_destructive(dst_size); @@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, - regs.pitch_out); + abs_pitch_out); memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 6ce179167..ce827eb6c 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -4,6 +4,7 @@ #include #include +#include "common/scratch_buffer.h" #include "common/settings.h" #include "video_core/host1x/codecs/h264.h" #include "video_core/host1x/host1x.h" @@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) { } void H264BitWriter::WriteScalingList(std::span list, s32 start, s32 count) { - std::vector scan(count); + static Common::ScratchBuffer scan{}; + scan.resize_destructive(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); } else { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index b2f7e160a..45141e488 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, VideoCommon::CacheType which) { - std::vector tmp_buffer(size); + tmp_buffer.resize_destructive(size); ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); // The output block must be flushed in case it has data modified from the GPU. @@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons return result; } -std::vector> MemoryManager::GetSubmappedRange( - GPUVAddr gpu_addr, std::size_t size) const { - std::vector> result{}; +boost::container::small_vector, 32> +MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const { + boost::container::small_vector, 32> result{}; GetSubmappedRangeImpl(gpu_addr, size, result); return result; } @@ -680,8 +680,9 @@ std::vector> MemoryManager::GetSubmappedRange( template void MemoryManager::GetSubmappedRangeImpl( GPUVAddr gpu_addr, std::size_t size, - std::vector, std::size_t>>& - result) const { + boost::container::small_vector< + std::pair, std::size_t>, 32>& result) + const { std::optional, std::size_t>> last_segment{}; std::optional old_page_addr{}; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 794535122..4202c26ff 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -8,10 +8,12 @@ #include #include #include +#include #include "common/common_types.h" #include "common/multi_level_page_table.h" #include "common/range_map.h" +#include "common/scratch_buffer.h" #include "common/virtual_buffer.h" #include "video_core/cache_types.h" #include "video_core/pte_kind.h" @@ -107,8 +109,8 @@ public: * if the region is continuous, a single pair will be returned. If it's unmapped, an empty * vector will be returned; */ - std::vector> GetSubmappedRange(GPUVAddr gpu_addr, - std::size_t size) const; + boost::container::small_vector, 32> GetSubmappedRange( + GPUVAddr gpu_addr, std::size_t size) const; GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); @@ -165,7 +167,8 @@ private: template void GetSubmappedRangeImpl( GPUVAddr gpu_addr, std::size_t size, - std::vector, std::size_t>>& + boost::container::small_vector< + std::pair, std::size_t>, 32>& result) const; Core::System& system; @@ -215,8 +218,8 @@ private: Common::VirtualBuffer big_page_table_cpu; std::vector big_page_continuous; - std::vector> page_stash{}; - std::vector> page_stash2{}; + boost::container::small_vector, 32> page_stash{}; + boost::container::small_vector, 32> page_stash2{}; mutable std::mutex guard; @@ -226,6 +229,8 @@ private: std::unique_ptr accumulator; static std::atomic unique_identifier_generator; + + Common::ScratchBuffer tmp_buffer; }; } // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3f077311e..0329ed820 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, case Shader::Stage::VertexB: case Shader::Stage::Geometry: if (!use_assembly_shaders && key.xfb_enabled != 0) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } break; case Shader::Stage::TessellationEval: diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index e30fcb1ed..f47301ad5 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; // Measuring a popular game, this number never exceeds the specified size once data is warmed up - boost::container::small_vector vk_copies(copies.size()); + boost::container::small_vector vk_copies(copies.size()); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a2cfb2105..9f316113c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + auto [varyings, count] = + VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } info.convert_depth_mode = gl_ndc; } @@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + auto [varyings, count] = + VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } info.convert_depth_mode = gl_ndc; break; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f025f618b..f3cef09dd 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies( - std::span copies, size_t buffer_offset) { - std::vector result(copies.size()); +[[maybe_unused]] [[nodiscard]] boost::container::small_vector +TransformBufferCopies(std::span copies, size_t buffer_offset) { + boost::container::small_vector result(copies.size()); std::ranges::transform( copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { return VkBufferCopy{ @@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return result; } -[[nodiscard]] std::vector TransformBufferImageCopies( +[[nodiscard]] boost::container::small_vector TransformBufferImageCopies( std::span copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { struct Maker { VkBufferImageCopy operator()(const BufferImageCopy& copy) const { @@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { VkImageAspectFlags aspect_mask; }; if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - std::vector result(copies.size() * 2); + boost::container::small_vector result(copies.size() * 2); std::ranges::transform(copies, result.begin(), Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); std::ranges::transform(copies, result.begin() + copies.size(), Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); return result; } else { - std::vector result(copies.size()); + boost::container::small_vector result(copies.size()); std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); return result; } @@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() { void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, std::span copies) { - std::vector vk_in_copies(copies.size()); - std::vector vk_out_copies(copies.size()); + boost::container::small_vector vk_in_copies(copies.size()); + boost::container::small_vector vk_out_copies(copies.size()); const VkImageAspectFlags src_aspect_mask = src.AspectMask(); const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); @@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im void TextureCacheRuntime::CopyImage(Image& dst, Image& src, std::span copies) { - std::vector vk_copies(copies.size()); + boost::container::small_vector vk_copies(copies.size()); const VkImageAspectFlags aspect_mask = dst.AspectMask(); ASSERT(aspect_mask == src.AspectMask()); @@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, ScaleDown(true); } scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); + auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); const VkBuffer src_buffer = buffer; const VkImage vk_image = *original_image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; @@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span buffers_span, std::span buffers_vector{}; - boost::container::small_vector, 1> vk_copies; + boost::container::small_vector buffers_vector{}; + boost::container::small_vector, 8> + vk_copies; for (size_t index = 0; index < buffers_span.size(); index++) { buffers_vector.emplace_back(buffers_span[index]); vk_copies.emplace_back( @@ -1858,7 +1859,7 @@ Framebuffer::~Framebuffer() = default; void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer, bool is_rescaled) { - std::vector attachments; + boost::container::small_vector attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index c5213875b..4db948b6d 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() { marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), marked_for_removal.end()); - std::vector removed_shaders; - removed_shaders.reserve(marked_for_removal.size()); + boost::container::small_vector removed_shaders; std::scoped_lock lock{lookup_mutex}; - for (Entry* const entry : marked_for_removal) { removed_shaders.push_back(entry->data); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1b8a17ee8..55d49d017 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "common/common_funcs.h" #include "common/common_types.h" @@ -108,8 +109,8 @@ struct ImageBase { std::vector image_view_infos; std::vector image_view_ids; - std::vector slice_offsets; - std::vector slice_subresources; + boost::container::small_vector slice_offsets; + boost::container::small_vector slice_subresources; std::vector aliased_images; std::vector overlapping_images; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d58bb69ff..d3f03a995 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -526,7 +526,7 @@ void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { template void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { - std::vector images; + boost::container::small_vector images; ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { if (!image.IsSafeDownload()) { return; @@ -579,7 +579,7 @@ std::optional TextureCache

::GetFlushArea(V template void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { - std::vector deleted_images; + boost::container::small_vector deleted_images; ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; @@ -593,7 +593,7 @@ void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { template void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { - std::vector deleted_images; + boost::container::small_vector deleted_images; ForEachImageInRegionGPU(as_id, gpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { @@ -1101,7 +1101,7 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, const bool native_bgr = runtime.HasNativeBgr(); const bool flexible_formats = True(options & RelaxedOptions::Format); ImageId image_id{}; - boost::container::small_vector image_ids; + boost::container::small_vector image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; @@ -1622,7 +1622,7 @@ ImageId TextureCache

::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) } } ImageId image_id{}; - boost::container::small_vector image_ids; + boost::container::small_vector image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; @@ -1942,7 +1942,7 @@ void TextureCache

::RegisterImage(ImageId image_id) { image.map_view_id = map_id; return; } - std::vector sparse_maps{}; + boost::container::small_vector sparse_maps; ForEachSparseSegment( image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); @@ -2217,7 +2217,7 @@ void TextureCache

::MarkModification(ImageBase& image) noexcept { template void TextureCache

::SynchronizeAliases(ImageId image_id) { - boost::container::small_vector aliased_images; + boost::container::small_vector aliased_images; Image& image = slot_images[image_id]; bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); bool any_modified = True(image.flags & ImageFlagBits::GpuModified); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 44232b961..e9ec91265 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -56,7 +56,7 @@ struct ImageViewInOut { struct AsyncDecodeContext { ImageId image_id; Common::ScratchBuffer decoded_data; - std::vector copies; + boost::container::small_vector copies; std::mutex mutex; std::atomic_bool complete; }; @@ -429,7 +429,7 @@ private: std::unordered_map, Common::IdentityHash> page_table; std::unordered_map, Common::IdentityHash> sparse_page_table; - std::unordered_map> sparse_views; + std::unordered_map> sparse_views; VAddr virtual_invalid_space{}; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 95a5b47d8..f781cb7a0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -329,13 +329,13 @@ template [[nodiscard]] std::optional ResolveOverlapRightAddress3D( const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { - const std::vector slice_offsets = CalculateSliceOffsets(new_info); + const auto slice_offsets = CalculateSliceOffsets(new_info); const u32 diff = static_cast(overlap.gpu_addr - gpu_addr); const auto it = std::ranges::find(slice_offsets, diff); if (it == slice_offsets.end()) { return std::nullopt; } - const std::vector subresources = CalculateSliceSubresources(new_info); + const auto subresources = CalculateSliceSubresources(new_info); const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; const ImageInfo& info = overlap.info; if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { @@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { return sizes; } -std::vector CalculateSliceOffsets(const ImageInfo& info) { +boost::container::small_vector CalculateSliceOffsets(const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); - std::vector offsets; + boost::container::small_vector offsets; offsets.reserve(NumSlices(info)); const LevelInfo level_info = MakeLevelInfo(info); @@ -679,9 +679,10 @@ std::vector CalculateSliceOffsets(const ImageInfo& info) { return offsets; } -std::vector CalculateSliceSubresources(const ImageInfo& info) { +boost::container::small_vector CalculateSliceSubresources( + const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); - std::vector subresources; + boost::container::small_vector subresources; subresources.reserve(NumSlices(info)); for (s32 level = 0; level < info.resources.levels; ++level) { const s32 depth = AdjustMipSize(info.size.depth, level); @@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { } } -std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base, u32 up_scale, u32 down_shift) { +boost::container::small_vector MakeShrinkImageCopies(const ImageInfo& dst, + const ImageInfo& src, + SubresourceBase base, + u32 up_scale, u32 down_shift) { ASSERT(dst.resources.levels >= src.resources.levels); const bool is_dst_3d = dst.type == ImageType::e3D; @@ -733,7 +736,7 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn ASSERT(src.resources.levels == 1); } const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; - std::vector copies; + boost::container::small_vector copies; copies.reserve(src.resources.levels); for (s32 level = 0; level < src.resources.levels; ++level) { ImageCopy& copy = copies.emplace_back(); @@ -770,9 +773,10 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } -std::vector MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, - u32 down_shift) { - std::vector copies; +boost::container::small_vector MakeReinterpretImageCopies(const ImageInfo& src, + u32 up_scale, + u32 down_shift) { + boost::container::small_vector copies; copies.reserve(src.resources.levels); const bool is_3d = src.type == ImageType::e3D; for (s32 level = 0; level < src.resources.levels; ++level) { @@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); } -std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, - const ImageInfo& info, std::span input, - std::span output) { +boost::container::small_vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, + GPUVAddr gpu_addr, + const ImageInfo& info, + std::span input, + std::span output) { const size_t guest_size_bytes = input.size_bytes(); const u32 bpp_log2 = BytesPerBlockLog2(info.format); const Extent3D size = info.size; @@ -861,7 +867,7 @@ std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP info.tile_width_spacing); size_t guest_offset = 0; u32 host_offset = 0; - std::vector copies(num_levels); + boost::container::small_vector copies(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); @@ -978,7 +984,7 @@ void ConvertImage(std::span input, const ImageInfo& info, std::span FullDownloadCopies(const ImageInfo& info) { +boost::container::small_vector FullDownloadCopies(const ImageInfo& info) { const Extent3D size = info.size; const u32 bytes_per_block = BytesPerBlock(info.format); if (info.type == ImageType::Linear) { @@ -1006,7 +1012,7 @@ std::vector FullDownloadCopies(const ImageInfo& info) { u32 host_offset = 0; - std::vector copies(num_levels); + boost::container::small_vector copies(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); @@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { return AdjustMipBlockSize(num_tiles, level_info.block, level); } -std::vector FullUploadSwizzles(const ImageInfo& info) { +boost::container::small_vector FullUploadSwizzles(const ImageInfo& info) { const Extent2D tile_size = DefaultBlockSize(info.format); if (info.type == ImageType::Linear) { - return std::vector{SwizzleParameters{ + return {SwizzleParameters{ .num_tiles = AdjustTileSize(info.size, tile_size), .block = {}, .buffer_offset = 0, @@ -1057,7 +1063,7 @@ std::vector FullUploadSwizzles(const ImageInfo& info) { const s32 num_levels = info.resources.levels; u32 guest_offset = 0; - std::vector params(num_levels); + boost::container::small_vector params(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 84aa6880d..ab45a43c4 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -5,6 +5,7 @@ #include #include +#include #include "common/common_types.h" #include "common/scratch_buffer.h" @@ -40,9 +41,10 @@ struct OverlapResult { [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; -[[nodiscard]] std::vector CalculateSliceOffsets(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector CalculateSliceOffsets(const ImageInfo& info); -[[nodiscard]] std::vector CalculateSliceSubresources(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector CalculateSliceSubresources( + const ImageInfo& info); [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); @@ -51,21 +53,18 @@ struct OverlapResult { [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; -[[nodiscard]] std::vector MakeShrinkImageCopies(const ImageInfo& dst, - const ImageInfo& src, - SubresourceBase base, u32 up_scale = 1, - u32 down_shift = 0); +[[nodiscard]] boost::container::small_vector MakeShrinkImageCopies( + const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1, + u32 down_shift = 0); -[[nodiscard]] std::vector MakeReinterpretImageCopies(const ImageInfo& src, - u32 up_scale = 1, - u32 down_shift = 0); +[[nodiscard]] boost::container::small_vector MakeReinterpretImageCopies( + const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0); [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); -[[nodiscard]] std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, - GPUVAddr gpu_addr, const ImageInfo& info, - std::span input, - std::span output); +[[nodiscard]] boost::container::small_vector UnswizzleImage( + Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span input, std::span output); [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageBase& image, std::span output); @@ -73,13 +72,15 @@ struct OverlapResult { void ConvertImage(std::span input, const ImageInfo& info, std::span output, std::span copies); -[[nodiscard]] std::vector FullDownloadCopies(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector FullDownloadCopies( + const ImageInfo& info); [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); -[[nodiscard]] std::vector FullUploadSwizzles(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector FullUploadSwizzles( + const ImageInfo& info); void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, std::span copies, std::span memory, diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index 155599316..1f353d2df 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp @@ -13,7 +13,7 @@ namespace VideoCommon { -std::vector MakeTransformFeedbackVaryings( +std::pair, u32> MakeTransformFeedbackVaryings( const TransformFeedbackState& state) { static constexpr std::array VECTORS{ 28U, // gl_Position @@ -62,7 +62,8 @@ std::vector MakeTransformFeedbackVaryings( 216U, // gl_TexCoord[6] 220U, // gl_TexCoord[7] }; - std::vector xfb(256); + std::array xfb{}; + u32 count{0}; for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { const auto& locations = state.varyings[buffer]; const auto& layout = state.layouts[buffer]; @@ -103,11 +104,12 @@ std::vector MakeTransformFeedbackVaryings( } } xfb[attribute] = varying; + count = std::max(count, attribute); highest = std::max(highest, (base_offset + varying.components) * 4); } UNIMPLEMENTED_IF(highest != layout.stride); } - return xfb; + return {xfb, count + 1}; } } // namespace VideoCommon diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h index d13eb16c3..401b1352a 100644 --- a/src/video_core/transform_feedback.h +++ b/src/video_core/transform_feedback.h @@ -24,7 +24,7 @@ struct TransformFeedbackState { varyings; }; -std::vector MakeTransformFeedbackVaryings( +std::pair, u32> MakeTransformFeedbackVaryings( const TransformFeedbackState& state); } // namespace VideoCommon diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index fa9cde75b..b11abe311 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, std::vector ExtensionListForVulkan( const std::set>& extensions) { std::vector output; + output.reserve(extensions.size()); for (const auto& extension : extensions) { output.push_back(extension.c_str()); } -- cgit v1.2.3