From 2da54f6e737aa27aa7d8475085b640f0a739f0d9 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Thu, 8 Jul 2021 19:29:20 +0200 Subject: [PATCH] early-access version 1860 --- README.md | 2 +- src/audio_core/audio_renderer.cpp | 30 +-- src/audio_core/command_generator.cpp | 95 +++++--- src/audio_core/command_generator.h | 23 +- src/audio_core/sink_context.cpp | 15 +- src/audio_core/sink_context.h | 2 - src/video_core/buffer_cache/buffer_cache.h | 1 - src/video_core/engines/shader_bytecode.h | 4 - .../renderer_opengl/gl_arb_decompiler.cpp | 82 ++----- .../renderer_opengl/gl_shader_decompiler.cpp | 146 ++---------- .../renderer_vulkan/vk_shader_decompiler.cpp | 115 +++------ src/video_core/shader/control_flow.cpp | 159 ++++--------- src/video_core/shader/control_flow.h | 13 +- src/video_core/shader/decode.cpp | 225 ++++++------------ src/video_core/shader/decode/other.cpp | 11 - src/video_core/shader/decode/texture.cpp | 4 +- src/video_core/shader/node.h | 15 +- src/video_core/shader/node_helper.cpp | 5 - src/video_core/shader/node_helper.h | 3 - src/video_core/shader/shader_ir.h | 95 ++------ 20 files changed, 314 insertions(+), 731 deletions(-) diff --git a/README.md b/README.md index 42f6be0ac..ae29363e6 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1859. +This is the source code for early-access 1860. ## Legal Notice diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index ccd5ca6cc..7dba739b4 100755 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -29,10 +29,9 @@ namespace { (static_cast(r_channel) * r_mix_amount))); } -[[nodiscard]] static constexpr std::tuple Mix6To2(s16 fl_channel, s16 fr_channel, - s16 fc_channel, - [[maybe_unused]] s16 lf_channel, - s16 bl_channel, s16 br_channel) { +[[maybe_unused, nodiscard]] static constexpr std::tuple Mix6To2( + s16 fl_channel, s16 fr_channel, s16 fc_channel, [[maybe_unused]] s16 lf_channel, s16 bl_channel, + s16 br_channel) { // Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels // are mixed to be 36.94% @@ -57,11 +56,11 @@ namespace { const std::array& coeff) { const auto left = static_cast(fl_channel) * coeff[0] + static_cast(fc_channel) * coeff[1] + - static_cast(lf_channel) * coeff[2] + static_cast(bl_channel) * coeff[0]; + static_cast(lf_channel) * coeff[2] + static_cast(bl_channel) * coeff[3]; const auto right = static_cast(fr_channel) * coeff[0] + static_cast(fc_channel) * coeff[1] + - static_cast(lf_channel) * coeff[2] + static_cast(br_channel) * coeff[0]; + static_cast(lf_channel) * coeff[2] + static_cast(br_channel) * coeff[3]; return {ClampToS16(static_cast(left)), ClampToS16(static_cast(right))}; } @@ -241,7 +240,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { const auto channel_count = buffer_offsets.size(); const auto& final_mix = mix_context.GetFinalMixInfo(); const auto& in_params = final_mix.GetInParams(); - std::vector mix_buffers(channel_count); + std::vector> mix_buffers(channel_count); for (std::size_t i = 0; i < channel_count; i++) { mix_buffers[i] = command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]); @@ -294,18 +293,11 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample); } else if (stream_channel_count == 2) { // Mix all channels into 2 channels - if (sink_context.HasDownMixingCoefficients()) { - const auto [left, right] = Mix6To2WithCoefficients( - fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample, - sink_context.GetDownmixCoefficients()); - buffer[i * stream_channel_count + 0] = left; - buffer[i * stream_channel_count + 1] = right; - } else { - const auto [left, right] = Mix6To2(fl_sample, fr_sample, fc_sample, - lf_sample, bl_sample, br_sample); - buffer[i * stream_channel_count + 0] = left; - buffer[i * stream_channel_count + 1] = right; - } + const auto [left, right] = Mix6To2WithCoefficients( + fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample, + sink_context.GetDownmixCoefficients()); + buffer[i * stream_channel_count + 0] = left; + buffer[i * stream_channel_count + 1] = right; } else if (stream_channel_count == 6) { // Pass through buffer[i * stream_channel_count + 0] = fl_sample; diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp index b3250be09..3b28806b2 100755 --- a/src/audio_core/command_generator.cpp +++ b/src/audio_core/command_generator.cpp @@ -31,7 +31,7 @@ constexpr std::array EARLY_GAIN{ 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f}; template -void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { +void ApplyMix(std::span output, std::span input, s32 gain, s32 sample_count) { for (std::size_t i = 0; i < static_cast(sample_count); i += N) { for (std::size_t j = 0; j < N; j++) { output[i + j] += @@ -40,7 +40,17 @@ void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { } } -s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sample_count) { +s32 ApplyMixRamp(std::span output, std::span input, float gain, float delta, + s32 sample_count) { + // XC2 passes in NaN mix volumes, causing further issues as we handle everything as s32 rather + // than float, so the NaN propogation is lost. As the samples get further modified for + // volume etc, they can get out of NaN range, so a later heuristic for catching this is + // more difficult. Handle that here by setting these samples to silence. + if (std::isnan(gain)) { + gain = 0.0f; + delta = 0.0f; + } + s32 x = 0; for (s32 i = 0; i < sample_count; i++) { x = static_cast(static_cast(input[i]) * gain); @@ -50,20 +60,22 @@ s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sam return x; } -void ApplyGain(s32* output, const s32* input, s32 gain, s32 delta, s32 sample_count) { +void ApplyGain(std::span output, std::span input, s32 gain, s32 delta, + s32 sample_count) { for (s32 i = 0; i < sample_count; i++) { output[i] = static_cast((static_cast(input[i]) * gain + 0x4000) >> 15); gain += delta; } } -void ApplyGainWithoutDelta(s32* output, const s32* input, s32 gain, s32 sample_count) { +void ApplyGainWithoutDelta(std::span output, std::span input, s32 gain, + s32 sample_count) { for (s32 i = 0; i < sample_count; i++) { output[i] = static_cast((static_cast(input[i]) * gain + 0x4000) >> 15); } } -s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) { +s32 ApplyMixDepop(std::span output, s32 first_sample, s32 delta, s32 sample_count) { const bool positive = first_sample > 0; auto final_sample = std::abs(first_sample); for (s32 i = 0; i < sample_count; i++) { @@ -128,10 +140,10 @@ constexpr std::array REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 3, 3, 3}; template -void ApplyReverbGeneric(I3dl2ReverbState& state, - const std::array& input, - const std::array& output, - s32 sample_count) { +void ApplyReverbGeneric( + I3dl2ReverbState& state, + const std::array, AudioCommon::MAX_CHANNEL_COUNT>& input, + const std::array, AudioCommon::MAX_CHANNEL_COUNT>& output, s32 sample_count) { auto GetTapLookup = []() { if constexpr (CHANNEL_COUNT == 1) { @@ -457,8 +469,8 @@ void CommandGenerator::GenerateBiquadFilterCommand([[maybe_unused]] s32 mix_buff "input_mix_buffer={}, output_mix_buffer={}", node_id, input_offset, output_offset); } - const auto* input = GetMixBuffer(input_offset); - auto* output = GetMixBuffer(output_offset); + std::span input = GetMixBuffer(input_offset); + std::span output = GetMixBuffer(output_offset); // Biquad filter parameters const auto [n0, n1, n2] = params.numerator; @@ -551,8 +563,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E return; } - std::array input{}; - std::array output{}; + std::array, AudioCommon::MAX_CHANNEL_COUNT> input{}; + std::array, AudioCommon::MAX_CHANNEL_COUNT> output{}; const auto status = params.status; for (s32 i = 0; i < channel_count; i++) { @@ -587,7 +599,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E for (s32 i = 0; i < channel_count; i++) { // Only copy if the buffer input and output do not match! if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) { - std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32)); + std::memcpy(output[i].data(), input[i].data(), + worker_params.sample_count * sizeof(s32)); } } } @@ -603,8 +616,8 @@ void CommandGenerator::GenerateBiquadFilterEffectCommand(s32 mix_buffer_offset, for (s32 i = 0; i < channel_count; i++) { // TODO(ogniK): Actually implement biquad filter if (params.input[i] != params.output[i]) { - const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]); - auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); + std::span input = GetMixBuffer(mix_buffer_offset + params.input[i]); + std::span output = GetMixBuffer(mix_buffer_offset + params.output[i]); ApplyMix<1>(output, input, 32768, worker_params.sample_count); } } @@ -643,14 +656,15 @@ void CommandGenerator::GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* inf if (samples_read != static_cast(worker_params.sample_count) && samples_read <= params.sample_count) { - std::memset(GetMixBuffer(output_index), 0, params.sample_count - samples_read); + std::memset(GetMixBuffer(output_index).data(), 0, + params.sample_count - samples_read); } } else { AuxInfoDSP empty{}; memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP)); memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP)); if (output_index != input_index) { - std::memcpy(GetMixBuffer(output_index), GetMixBuffer(input_index), + std::memcpy(GetMixBuffer(output_index).data(), GetMixBuffer(input_index).data(), worker_params.sample_count * sizeof(s32)); } } @@ -668,7 +682,7 @@ ServerSplitterDestinationData* CommandGenerator::GetDestinationData(s32 splitter } s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, - const s32* data, u32 sample_count, u32 write_offset, + std::span data, u32 sample_count, u32 write_offset, u32 write_count) { if (max_samples == 0) { return 0; @@ -678,14 +692,14 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3 return 0; } - std::size_t data_offset{}; + s32 data_offset{}; u32 remaining = sample_count; while (remaining > 0) { // Get position in buffer const auto base = send_buffer + (offset * sizeof(u32)); const auto samples_to_grab = std::min(max_samples - offset, remaining); // Write to output - memory.WriteBlock(base, (data + data_offset), samples_to_grab * sizeof(u32)); + memory.WriteBlock(base, (data.data() + data_offset), samples_to_grab * sizeof(u32)); offset = (offset + samples_to_grab) % max_samples; remaining -= samples_to_grab; data_offset += samples_to_grab; @@ -698,7 +712,7 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3 } s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, - s32* out_data, u32 sample_count, u32 read_offset, + std::span out_data, u32 sample_count, u32 read_offset, u32 read_count) { if (max_samples == 0) { return 0; @@ -710,15 +724,16 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3 } u32 remaining = sample_count; + s32 data_offset{}; while (remaining > 0) { const auto base = recv_buffer + (offset * sizeof(u32)); const auto samples_to_grab = std::min(max_samples - offset, remaining); std::vector buffer(samples_to_grab); memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32)); - std::memcpy(out_data, buffer.data(), buffer.size() * sizeof(u32)); - out_data += samples_to_grab; + std::memcpy(out_data.data() + data_offset, buffer.data(), buffer.size() * sizeof(u32)); offset = (offset + samples_to_grab) % max_samples; remaining -= samples_to_grab; + data_offset += samples_to_grab; } if (read_count != 0) { @@ -965,8 +980,8 @@ void CommandGenerator::GenerateMixCommand(std::size_t output_offset, std::size_t node_id, input_offset, output_offset, volume); } - auto* output = GetMixBuffer(output_offset); - const auto* input = GetMixBuffer(input_offset); + std::span output = GetMixBuffer(output_offset); + std::span input = GetMixBuffer(input_offset); const s32 gain = static_cast(volume * 32768.0f); // Mix with loop unrolling @@ -1172,12 +1187,14 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s return samples_processed; } -s32* CommandGenerator::GetMixBuffer(std::size_t index) { - return mix_buffer.data() + (index * worker_params.sample_count); +std::span CommandGenerator::GetMixBuffer(std::size_t index) { + return std::span(mix_buffer.data() + (index * worker_params.sample_count), + worker_params.sample_count); } -const s32* CommandGenerator::GetMixBuffer(std::size_t index) const { - return mix_buffer.data() + (index * worker_params.sample_count); +std::span CommandGenerator::GetMixBuffer(std::size_t index) const { + return std::span(mix_buffer.data() + (index * worker_params.sample_count), + worker_params.sample_count); } std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const { @@ -1188,15 +1205,15 @@ std::size_t CommandGenerator::GetTotalMixBufferCount() const { return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT; } -s32* CommandGenerator::GetChannelMixBuffer(s32 channel) { +std::span CommandGenerator::GetChannelMixBuffer(s32 channel) { return GetMixBuffer(worker_params.mix_buffer_count + channel); } -const s32* CommandGenerator::GetChannelMixBuffer(s32 channel) const { +std::span CommandGenerator::GetChannelMixBuffer(s32 channel) const { return GetMixBuffer(worker_params.mix_buffer_count + channel); } -void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, +void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span output, VoiceState& dsp_state, s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id) { @@ -1208,7 +1225,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate, in_params.mix_id, in_params.splitter_info_id); } - ASSERT_OR_EXECUTE(output != nullptr, { return; }); + ASSERT_OR_EXECUTE(output.data() != nullptr, { return; }); const auto resample_rate = static_cast( static_cast(in_params.sample_rate) / static_cast(target_sample_rate) * @@ -1225,6 +1242,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o } std::size_t temp_mix_offset{}; + s32 samples_output{}; auto samples_remaining = sample_count; while (samples_remaining > 0) { const auto samples_to_output = std::min(samples_remaining, min_required_samples); @@ -1328,20 +1346,21 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) { // No need to resample - std::memcpy(output, sample_buffer.data(), samples_read * sizeof(s32)); + std::memcpy(output.data() + samples_output, sample_buffer.data(), + samples_read * sizeof(s32)); } else { std::fill(sample_buffer.begin() + temp_mix_offset, sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read), 0); - AudioCore::Resample(output, sample_buffer.data(), resample_rate, dsp_state.fraction, - samples_to_output); + AudioCore::Resample(output.data() + samples_output, sample_buffer.data(), resample_rate, + dsp_state.fraction, samples_to_output); // Resample for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) { dsp_state.sample_history[i] = sample_buffer[samples_to_read + i]; } } - output += samples_to_output; samples_remaining -= samples_to_output; + samples_output += samples_to_output; } } diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h index f310d7317..59a33ba76 100755 --- a/src/audio_core/command_generator.h +++ b/src/audio_core/command_generator.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "audio_core/common.h" #include "audio_core/voice_context.h" #include "common/common_types.h" @@ -41,10 +42,10 @@ public: void PreCommand(); void PostCommand(); - [[nodiscard]] s32* GetChannelMixBuffer(s32 channel); - [[nodiscard]] const s32* GetChannelMixBuffer(s32 channel) const; - [[nodiscard]] s32* GetMixBuffer(std::size_t index); - [[nodiscard]] const s32* GetMixBuffer(std::size_t index) const; + [[nodiscard]] std::span GetChannelMixBuffer(s32 channel); + [[nodiscard]] std::span GetChannelMixBuffer(s32 channel) const; + [[nodiscard]] std::span GetMixBuffer(std::size_t index); + [[nodiscard]] std::span GetMixBuffer(std::size_t index) const; [[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const; [[nodiscard]] std::size_t GetTotalMixBufferCount() const; @@ -77,10 +78,11 @@ private: void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled); [[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index); - s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, const s32* data, - u32 sample_count, u32 write_offset, u32 write_count); - s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data, - u32 sample_count, u32 read_offset, u32 read_count); + s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, + std::span data, u32 sample_count, u32 write_offset, + u32 write_count); + s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, + std::span out_data, u32 sample_count, u32 read_offset, u32 read_count); void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, std::vector& work_buffer); @@ -91,8 +93,9 @@ private: s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset, s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); - void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state, - s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id); + void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span output, + VoiceState& dsp_state, s32 channel, s32 target_sample_rate, + s32 sample_count, s32 node_id); AudioCommon::AudioRendererParameter& worker_params; VoiceContext& voice_context; diff --git a/src/audio_core/sink_context.cpp b/src/audio_core/sink_context.cpp index a69543696..cc55b290c 100755 --- a/src/audio_core/sink_context.cpp +++ b/src/audio_core/sink_context.cpp @@ -15,10 +15,17 @@ std::size_t SinkContext::GetCount() const { void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) { ASSERT(in.type == SinkTypes::Device); - has_downmix_coefs = in.device.down_matrix_enabled; - if (has_downmix_coefs) { + if (in.device.down_matrix_enabled) { downmix_coefficients = in.device.down_matrix_coef; + } else { + downmix_coefficients = { + 1.0f, // front + 0.707f, // center + 0.0f, // lfe + 0.707f, // back + }; } + in_use = in.in_use; use_count = in.device.input_count; buffers = in.device.input; @@ -34,10 +41,6 @@ std::vector SinkContext::OutputBuffers() const { return buffer_ret; } -bool SinkContext::HasDownMixingCoefficients() const { - return has_downmix_coefs; -} - const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const { return downmix_coefficients; } diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h index 9e2b69785..254961fe2 100755 --- a/src/audio_core/sink_context.h +++ b/src/audio_core/sink_context.h @@ -84,7 +84,6 @@ public: [[nodiscard]] bool InUse() const; [[nodiscard]] std::vector OutputBuffers() const; - [[nodiscard]] bool HasDownMixingCoefficients() const; [[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const; private: @@ -92,7 +91,6 @@ private: s32 use_count{}; std::array buffers{}; std::size_t sink_count{}; - bool has_downmix_coefs{false}; DownmixCoefficients downmix_coefficients{}; }; } // namespace AudioCore diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f04538dca..910909201 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -591,7 +591,6 @@ void BufferCache

::CommitAsyncFlushesHigh() { for (auto& interval : intervals) { const std::size_t size = interval.upper() - interval.lower(); const VAddr cpu_addr = interval.lower(); - const VAddr cpu_addr_end = interval.upper(); ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { boost::container::small_vector copies; buffer.ForEachDownloadRange( diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 5d659dcaf..8b45f1b62 100755 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1785,8 +1785,6 @@ public: SSY, SYNC, BRK, - CAL, - RET, DEPBAR, VOTE, VOTE_VTG, @@ -2110,8 +2108,6 @@ private: INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), INST("111000110100----", Id::BRK, Type::Flow, "BRK"), INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), - INST("111000100110----", Id::CAL, Type::Flow, "CAL"), - INST("111000110010----", Id::RET, Type::Flow, "RET"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index 84b8a3243..e8d8d2aa5 100755 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -491,9 +491,6 @@ private: const Registry& registry; const ShaderType stage; - std::shared_ptr context_func; - u32 ast_var_base{}; - std::size_t num_temporaries = 0; std::size_t max_temporaries = 0; @@ -810,33 +807,13 @@ ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const R : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { DefineGlobalMemory(); - context_func = ir.GetMainFunction(); - ast_var_base = 0; - AddLine("TEMP RC;"); AddLine("TEMP FSWZA[4];"); AddLine("TEMP FSWZB[4];"); - InitializeVariables(); - AddLine("main:"); - if (context_func->IsDecompiled()) { + if (ir.IsDecompiled()) { DecompileAST(); } else { DecompileBranchMode(); - AddLine("RET;"); - } - - const auto& subfunctions = ir.GetSubFunctions(); - auto it = subfunctions.begin(); - while (it != subfunctions.end()) { - context_func = *it; - AddLine("func_{}:", context_func->GetId()); - if (context_func->IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - AddLine("RET;"); - } - it++; } AddLine("END"); @@ -1083,38 +1060,41 @@ void ARBDecompiler::InitializeVariables() { } void ARBDecompiler::DecompileAST() { - const u32 num_flow_variables = context_func->GetASTNumVariables(); + const u32 num_flow_variables = ir.GetASTNumVariables(); for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("TEMP F{};", i + ast_var_base); + AddLine("TEMP F{};", i); } for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i + ast_var_base); + AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); } - VisitAST(context_func->GetASTProgram()); - ast_var_base += num_flow_variables; + InitializeVariables(); + + VisitAST(ir.GetASTProgram()); } void ARBDecompiler::DecompileBranchMode() { static constexpr u32 FLOW_STACK_SIZE = 20; - if (!context_func->IsFlowStackDisabled()) { + if (!ir.IsFlowStackDisabled()) { AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); AddLine("TEMP SSY_TOP;"); AddLine("TEMP PBK_TOP;"); } - AddLine("TEMP PC{};", context_func->GetId()); + AddLine("TEMP PC;"); - if (!context_func->IsFlowStackDisabled()) { + if (!ir.IsFlowStackDisabled()) { AddLine("MOV.U SSY_TOP.x, 0;"); AddLine("MOV.U PBK_TOP.x, 0;"); } - const auto basic_block_end = context_func->GetBasicBlocks().end(); - auto basic_block_it = context_func->GetBasicBlocks().begin(); + InitializeVariables(); + + const auto basic_block_end = ir.GetBasicBlocks().end(); + auto basic_block_it = ir.GetBasicBlocks().begin(); const u32 first_address = basic_block_it->first; - AddLine("MOV.U PC{}.x, {};", context_func->GetId(), first_address); + AddLine("MOV.U PC.x, {};", first_address); AddLine("REP;"); @@ -1123,7 +1103,7 @@ void ARBDecompiler::DecompileBranchMode() { const auto& [address, bb] = *basic_block_it; ++num_blocks; - AddLine("SEQ.S.CC RC.x, PC{}.x, {};", context_func->GetId(), address); + AddLine("SEQ.S.CC RC.x, PC.x, {};", address); AddLine("IF NE.x;"); VisitBlock(bb); @@ -1134,7 +1114,7 @@ void ARBDecompiler::DecompileBranchMode() { const auto op = std::get_if(&*bb[bb.size() - 1]); if (!op || op->GetCode() != OperationCode::Branch) { const u32 next_address = basic_block_it->first; - AddLine("MOV.U PC{}.x, {};", context_func->GetId(), next_address); + AddLine("MOV.U PC.x, {};", next_address); AddLine("CONT;"); } } @@ -1172,8 +1152,7 @@ void ARBDecompiler::VisitAST(const ASTNode& node) { } else if (const auto decoded = std::get_if(&*node->GetInnerData())) { VisitBlock(decoded->nodes); } else if (const auto var_set = std::get_if(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", var_set->index + ast_var_base, - VisitExpression(var_set->condition)); + AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); ResetTemporaries(); } else if (const auto do_while = std::get_if(&*node->GetInnerData())) { const std::string condition = VisitExpression(do_while->condition); @@ -1193,11 +1172,7 @@ void ARBDecompiler::VisitAST(const ASTNode& node) { ResetTemporaries(); } if (ast_return->kills) { - if (stage == ShaderType::Fragment) { - AddLine("KIL TR;"); - } else { - AddLine("RET;"); - } + AddLine("KIL TR;"); } else { Exit(); } @@ -1244,7 +1219,7 @@ std::string ARBDecompiler::VisitExpression(const Expr& node) { return Visit(ir.GetConditionCode(expr->cc)); } if (const auto expr = std::get_if(&*node)) { - return fmt::format("F{}.x", expr->var_index + ast_var_base); + return fmt::format("F{}.x", expr->var_index); } if (const auto expr = std::get_if(&*node)) { return expr->value ? "0xffffffff" : "0"; @@ -1431,11 +1406,6 @@ std::string ARBDecompiler::Visit(const Node& node) { return {}; } - if (const auto func_call = std::get_if(&*node)) { - AddLine("CAL func_{};", func_call->GetFuncId()); - return {}; - } - if ([[maybe_unused]] const auto cmt = std::get_if(&*node)) { // Uncommenting this will generate invalid code. GLASM lacks comments. // AddLine("// {}", cmt->GetText()); @@ -1509,7 +1479,7 @@ std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { } void ARBDecompiler::Exit() { - if (!context_func->IsMain() || stage != ShaderType::Fragment) { + if (stage != ShaderType::Fragment) { AddLine("RET;"); return; } @@ -2051,13 +2021,13 @@ std::string ARBDecompiler::ImageStore(Operation operation) { std::string ARBDecompiler::Branch(Operation operation) { const auto target = std::get(*operation[0]); - AddLine("MOV.U PC{}.x, {};", context_func->GetId(), target.GetValue()); + AddLine("MOV.U PC.x, {};", target.GetValue()); AddLine("CONT;"); return {}; } std::string ARBDecompiler::BranchIndirect(Operation operation) { - AddLine("MOV.U PC{}.x, {};", context_func->GetId(), Visit(operation[0])); + AddLine("MOV.U PC.x, {};", Visit(operation[0])); AddLine("CONT;"); return {}; } @@ -2075,7 +2045,7 @@ std::string ARBDecompiler::PopFlowStack(Operation operation) { const auto stack = std::get(operation.GetMeta()); const std::string_view stack_name = StackName(stack); AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - AddLine("MOV.U PC{}.x, {}[{}_TOP.x].x;", context_func->GetId(), stack_name, stack_name); + AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); AddLine("CONT;"); return {}; } @@ -2086,10 +2056,6 @@ std::string ARBDecompiler::Exit(Operation) { } std::string ARBDecompiler::Discard(Operation) { - if (stage != ShaderType::Fragment) { - AddLine("RET;"); - return {}; - } AddLine("KIL TR;"); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 00bd9baaf..9c28498e8 100755 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -79,11 +79,6 @@ const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); )"; -enum class HelperFunction { - SignedAtomic = 0, - Total, -}; - class ShaderWriter final { public: void AddExpression(std::string_view text) { @@ -439,28 +434,6 @@ public: DeclareInternalFlags(); DeclareCustomVariables(); DeclarePhysicalAttributeReader(); - DeclareHelpersForward(); - - const auto& subfunctions = ir.GetSubFunctions(); - auto it = subfunctions.rbegin(); - while (it != subfunctions.rend()) { - context_func = *it; - code.AddLine("void func_{}() {{", context_func->GetId()); - ++code.scope; - - if (context_func->IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - - --code.scope; - code.AddLine("}}"); - - it++; - } - - context_func = ir.GetMainFunction(); code.AddLine("void main() {{"); ++code.scope; @@ -469,7 +442,7 @@ public: code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); } - if (context_func->IsDecompiled()) { + if (ir.IsDecompiled()) { DecompileAST(); } else { DecompileBranchMode(); @@ -477,9 +450,6 @@ public: --code.scope; code.AddLine("}}"); - - code.AddNewLine(); - DeclareHelpers(); } std::string GetResult() { @@ -492,13 +462,13 @@ private: void DecompileBranchMode() { // VM's program counter - const auto first_address = context_func->GetBasicBlocks().begin()->first; + const auto first_address = ir.GetBasicBlocks().begin()->first; code.AddLine("uint jmp_to = {}U;", first_address); // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. constexpr u32 FLOW_STACK_SIZE = 20; - if (!context_func->IsFlowStackDisabled()) { + if (!ir.IsFlowStackDisabled()) { for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); @@ -510,7 +480,7 @@ private: code.AddLine("switch (jmp_to) {{"); - for (const auto& pair : context_func->GetBasicBlocks()) { + for (const auto& pair : ir.GetBasicBlocks()) { const auto& [address, bb] = pair; code.AddLine("case 0x{:X}U: {{", address); ++code.scope; @@ -629,7 +599,7 @@ private: size = limit; } - code.AddLine("shared uint {}[{}];", GetSharedMemory(), size / 4); + code.AddLine("shared uint smem[{}];", size / 4); code.AddNewLine(); } code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", @@ -1013,27 +983,6 @@ private: } } - void DeclareHelpersForward() { - code.AddLine("int Helpers_AtomicShared(uint offset, int value, bool is_min);"); - code.AddNewLine(); - } - - void DeclareHelpers() { - if (IsHelperEnabled(HelperFunction::SignedAtomic)) { - code.AddLine( - R"(int Helpers_AtomicShared(uint offset, int value, bool is_min) {{ - uint oldValue, newValue; - do {{ - oldValue = {}[offset]; - newValue = is_min ? uint(min(int(oldValue), value)) : uint(max(int(oldValue), value)); - }} while (atomicCompSwap({}[offset], newValue, oldValue) != oldValue); - return int(oldValue); -}})", - GetSharedMemory(), GetSharedMemory()); - code.AddNewLine(); - } - } - void VisitBlock(const NodeBlock& bb) { for (const auto& node : bb) { Visit(node).CheckVoid(); @@ -1160,9 +1109,7 @@ private: } if (const auto smem = std::get_if(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), - Type::Uint}; + return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; } if (const auto internal_flag = std::get_if(&*node)) { @@ -1184,11 +1131,6 @@ private: return {}; } - if (const auto func_call = std::get_if(&*node)) { - code.AddLine("func_{}();", func_call->GetFuncId()); - return {}; - } - if (const auto comment = std::get_if(&*node)) { code.AddLine("// " + comment->GetText()); return {}; @@ -1656,9 +1598,7 @@ private: Type::Uint}; } else if (const auto smem = std::get_if(&*dest)) { ASSERT(stage == ShaderType::Compute); - target = { - fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), - Type::Uint}; + target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; } else if (const auto gmem = std::get_if(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()).AsUint(); const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); @@ -2175,14 +2115,7 @@ private: UNIMPLEMENTED_IF(meta->sampler.is_array); const std::size_t count = operation.GetOperandsCount(); - std::string expr = "texelFetch"; - - if (!meta->aoffi.empty()) { - expr += "Offset"; - } - - expr += '('; - + std::string expr = "texelFetch("; expr += GetSampler(meta->sampler); expr += ", "; @@ -2204,20 +2137,6 @@ private: expr += ", "; expr += Visit(meta->lod).AsInt(); } - - if (!meta->aoffi.empty()) { - expr += ", "; - expr += constructors.at(meta->aoffi.size() - 1); - expr += '('; - for (size_t i = 0; i < meta->aoffi.size(); ++i) { - if (i > 0) { - expr += ", "; - } - expr += Visit(meta->aoffi[i]).AsInt(); - } - expr += ')'; - } - expr += ')'; expr += GetSwizzle(meta->element); @@ -2264,11 +2183,8 @@ private: template Expression Atomic(Operation operation) { if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { - // Use a helper as a workaround due to memory being uint - SetHelperEnabled(HelperFunction::SignedAtomic, true); - return {fmt::format("Helpers_AtomicShared({}, {}, {})", Visit(operation[0]).AsInt(), - Visit(operation[1]).AsInt(), opname == Func::Min), - Type::Int}; + UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); + return {}; } return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), Visit(operation[1]).AsUint()), @@ -2351,9 +2267,7 @@ private: } Expression Exit(Operation operation) { - if (context_func->IsMain()) { - PreExit(); - } + PreExit(); code.AddLine("return;"); return {}; } @@ -2363,11 +2277,7 @@ private: // about unexecuted instructions that may follow this. code.AddLine("if (true) {{"); ++code.scope; - if (stage != ShaderType::Fragment) { - code.AddLine("return;"); - } else { - code.AddLine("discard;"); - } + code.AddLine("discard;"); --code.scope; code.AddLine("}}"); return {}; @@ -2478,7 +2388,7 @@ private: } Expression Barrier(Operation) { - if (!context_func->IsDecompiled()) { + if (!ir.IsDecompiled()) { LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); return {}; } @@ -2795,10 +2705,6 @@ private: } } - constexpr std::string_view GetSharedMemory() const { - return "shared_mem"; - } - std::string GetInternalFlag(InternalFlag flag) const { constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", "overflow_flag"}; @@ -2840,14 +2746,6 @@ private: return std::min(device.GetMaxVaryings(), Maxwell::NumVaryings); } - void SetHelperEnabled(HelperFunction hf, bool enabled) { - helper_functions_enabled[static_cast(hf)] = enabled; - } - - bool IsHelperEnabled(HelperFunction hf) const { - return helper_functions_enabled[static_cast(hf)]; - } - const Device& device; const ShaderIR& ir; const Registry& registry; @@ -2857,13 +2755,9 @@ private: const Header header; std::unordered_map transform_feedback; - std::shared_ptr context_func; - ShaderWriter code; std::optional max_input_vertices; - - std::array(HelperFunction::Total)> helper_functions_enabled{}; }; std::string GetFlowVariable(u32 index) { @@ -3008,15 +2902,9 @@ public: decomp.code.scope++; } if (ast.kills) { - if (decomp.stage != ShaderType::Fragment) { - decomp.code.AddLine("return;"); - } else { - decomp.code.AddLine("discard;"); - } + decomp.code.AddLine("discard;"); } else { - if (decomp.context_func->IsMain()) { - decomp.PreExit(); - } + decomp.PreExit(); decomp.code.AddLine("return;"); } if (!is_true) { @@ -3049,13 +2937,13 @@ private: }; void GLSLDecompiler::DecompileAST() { - const u32 num_flow_variables = context_func->GetASTNumVariables(); + const u32 num_flow_variables = ir.GetASTNumVariables(); for (u32 i = 0; i < num_flow_variables; i++) { code.AddLine("bool {} = false;", GetFlowVariable(i)); } ASTDecompiler decompiler{*this}; - decompiler.Visit(context_func->GetASTProgram()); + decompiler.Visit(ir.GetASTProgram()); } } // Anonymous namespace diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 258e2f5df..c6846d886 100755 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -406,38 +406,10 @@ private: binding = DeclareStorageTexels(binding); binding = DeclareImages(binding); - const auto& subfunctions = ir.GetSubFunctions(); - - labels.resize(subfunctions.size() + 1); - other_functions.resize(subfunctions.size()); - - auto it = subfunctions.rbegin(); - while (it != subfunctions.rend()) { - context_func = *it; - other_functions[context_func->GetId() - 1] = - OpFunction(t_void, {}, TypeFunction(t_void)); - AddLabel(); - - if (context_func->IsDecompiled()) { - DeclareFlowVariables(); - DecompileAST(); - } else { - AllocateLabels(); - DecompileBranchMode(); - } - - OpReturn(); - OpFunctionEnd(); - - it++; - } - - context_func = ir.GetMainFunction(); - const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); AddLabel(); - if (context_func->IsDecompiled()) { + if (ir.IsDecompiled()) { DeclareFlowVariables(); DecompileAST(); } else { @@ -469,18 +441,16 @@ private: void DecompileAST(); void DecompileBranchMode() { - const u32 first_address = context_func->GetBasicBlocks().begin()->first; - const u32 func_id = context_func->GetId(); - const std::string func_id_msg = std::to_string(func_id); - const Id loop_label = OpLabel("loop_" + func_id_msg); - const Id merge_label = OpLabel("merge_" + func_id_msg); + const u32 first_address = ir.GetBasicBlocks().begin()->first; + const Id loop_label = OpLabel("loop"); + const Id merge_label = OpLabel("merge"); const Id dummy_label = OpLabel(); const Id jump_label = OpLabel(); - continue_label = OpLabel("continue_" + func_id_msg); + continue_label = OpLabel("continue"); std::vector literals; std::vector branch_labels; - for (const auto& [literal, label] : labels[func_id]) { + for (const auto& [literal, label] : labels) { literals.push_back(literal); branch_labels.push_back(label); } @@ -492,11 +462,11 @@ private: std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); - Name(jmp_to, "jmp_to_" + func_id_msg); - Name(ssy_flow_stack, "ssy_flow_stack_" + func_id_msg); - Name(ssy_flow_stack_top, "ssy_flow_stack_top_" + func_id_msg); - Name(pbk_flow_stack, "pbk_flow_stack_" + func_id_msg); - Name(pbk_flow_stack_top, "pbk_flow_stack_top_" + func_id_msg); + Name(jmp_to, "jmp_to"); + Name(ssy_flow_stack, "ssy_flow_stack"); + Name(ssy_flow_stack_top, "ssy_flow_stack_top"); + Name(pbk_flow_stack, "pbk_flow_stack"); + Name(pbk_flow_stack_top, "pbk_flow_stack_top"); DefinePrologue(); @@ -514,14 +484,13 @@ private: AddLabel(default_branch); OpReturn(); - for (const auto& [address, bb] : context_func->GetBasicBlocks()) { - AddLabel(labels[func_id].at(address)); + for (const auto& [address, bb] : ir.GetBasicBlocks()) { + AddLabel(labels.at(address)); VisitBasicBlock(bb); - const auto next_it = labels[func_id].lower_bound(address + 1); - const Id next_label = - next_it != labels[func_id].end() ? next_it->second : default_branch; + const auto next_it = labels.lower_bound(address + 1); + const Id next_label = next_it != labels.end() ? next_it->second : default_branch; OpBranch(next_label); } @@ -539,10 +508,9 @@ private: static constexpr auto INTERNAL_FLAGS_COUNT = static_cast(InternalFlag::Amount); void AllocateLabels() { - const u32 func_id = context_func->GetId(); - for (const auto& pair : context_func->GetBasicBlocks()) { + for (const auto& pair : ir.GetBasicBlocks()) { const u32 address = pair.first; - labels[func_id].emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); + labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); } } @@ -621,14 +589,6 @@ private: DeclareOutputVertex(); } - void SafeKill() { - if (stage != ShaderType::Fragment) { - OpReturn(); - return; - } - OpKill(); - } - void DeclareFragment() { if (stage != ShaderType::Fragment) { return; @@ -696,7 +656,7 @@ private: } void DeclareFlowVariables() { - for (u32 i = 0; i < context_func->GetASTNumVariables(); i++) { + for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); Name(id, fmt::format("flow_var_{}", static_cast(i))); flow_variables.emplace(i, AddGlobalVariable(id)); @@ -1373,12 +1333,6 @@ private: return {}; } - if (const auto func_call = std::get_if(&*node)) { - const u32 func_id = func_call->GetFuncId(); - OpFunctionCall(t_void, other_functions[func_id - 1]); - return {}; - } - if (const auto comment = std::get_if(&*node)) { if (device.HasDebuggingToolAttached()) { // We should insert comments with OpString instead of using named variables @@ -2170,7 +2124,7 @@ private: OpBranchConditional(condition, true_label, discard_label); AddLabel(discard_label); - SafeKill(); + OpKill(); AddLabel(true_label); } @@ -2221,9 +2175,7 @@ private: } Expression Exit(Operation operation) { - if (context_func->IsMain()) { - PreExit(); - } + PreExit(); inside_branch = true; if (conditional_branch_set) { OpReturn(); @@ -2240,12 +2192,12 @@ private: Expression Discard(Operation operation) { inside_branch = true; if (conditional_branch_set) { - SafeKill(); + OpKill(); } else { const Id dummy = OpLabel(); OpBranch(dummy); AddLabel(dummy); - SafeKill(); + OpKill(); AddLabel(); } return {}; @@ -2324,7 +2276,7 @@ private: } Expression Barrier(Operation) { - if (!context_func->IsDecompiled()) { + if (!ir.IsDecompiled()) { LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); return {}; } @@ -2818,8 +2770,6 @@ private: const Specialization& specialization; std::unordered_map transform_feedback; - std::shared_ptr context_func; - const Id t_void = Name(TypeVoid(), "void"); const Id t_bool = Name(TypeBool(), "bool"); @@ -2946,8 +2896,7 @@ private: Id ssy_flow_stack{}; Id pbk_flow_stack{}; Id continue_label{}; - std::vector> labels; - std::vector other_functions; + std::map labels; bool conditional_branch_set{}; bool inside_branch{}; @@ -3098,11 +3047,9 @@ public: decomp.OpBranchConditional(condition, then_label, endif_label); decomp.AddLabel(then_label); if (ast.kills) { - decomp.SafeKill(); + decomp.OpKill(); } else { - if (decomp.context_func->IsMain()) { - decomp.PreExit(); - } + decomp.PreExit(); decomp.OpReturn(); } decomp.AddLabel(endif_label); @@ -3111,11 +3058,9 @@ public: decomp.OpBranch(next_block); decomp.AddLabel(next_block); if (ast.kills) { - decomp.SafeKill(); + decomp.OpKill(); } else { - if (decomp.context_func->IsMain()) { - decomp.PreExit(); - } + decomp.PreExit(); decomp.OpReturn(); } decomp.AddLabel(decomp.OpLabel()); @@ -3152,7 +3097,7 @@ private: }; void SPIRVDecompiler::DecompileAST() { - const u32 num_flow_variables = context_func->GetASTNumVariables(); + const u32 num_flow_variables = ir.GetASTNumVariables(); for (u32 i = 0; i < num_flow_variables; i++) { const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); Name(id, fmt::format("flow_var_{}", i)); @@ -3161,7 +3106,7 @@ void SPIRVDecompiler::DecompileAST() { DefinePrologue(); - const ASTNode program = context_func->GetASTProgram(); + const ASTNode program = ir.GetASTProgram(); ASTDecompiler decompiler{*this}; decompiler.Visit(program); diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 7c8bd7e2f..43d965f2f 100755 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "common/assert.h" @@ -27,29 +26,17 @@ using Tegra::Shader::OpCode; constexpr s32 unassigned_branch = -2; -enum class JumpLabel : u32 { - SSYClass = 0, - PBKClass = 1, -}; - -struct JumpItem { - JumpLabel type; - u32 address; - - bool operator==(const JumpItem& other) const { - return std::tie(type, address) == std::tie(other.type, other.address); - } -}; - struct Query { u32 address{}; - std::stack stack{}; + std::stack ssy_stack{}; + std::stack pbk_stack{}; }; struct BlockStack { BlockStack() = default; - explicit BlockStack(const Query& q) : stack{q.stack} {} - std::stack stack{}; + explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} + std::stack ssy_stack{}; + std::stack pbk_stack{}; }; template @@ -78,36 +65,20 @@ struct BlockInfo { } }; -struct ProgramControl { - std::unordered_set found_functions{}; - std::list pending_functions{}; - - void RegisterFunction(u32 address) { - if (found_functions.count(address) != 0) { - return; - } - found_functions.insert(address); - pending_functions.emplace_back(address); - } -}; - struct CFGRebuildState { - explicit CFGRebuildState(ProgramControl& control_, const ProgramCode& program_code_, u32 start_, - u32 base_start_, Registry& registry_) - : control{control_}, program_code{program_code_}, registry{registry_}, start{start_}, - base_start{base_start_} {} + explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) + : program_code{program_code_}, registry{registry_}, start{start_} {} - ProgramControl& control; const ProgramCode& program_code; Registry& registry; u32 start{}; - u32 base_start{}; std::vector block_info; std::list inspect_queries; std::list queries; std::unordered_map registered; std::set labels; - std::map jump_labels; + std::map ssy_labels; + std::map pbk_labels; std::unordered_map stacks; ASTManager* manager{}; }; @@ -182,7 +153,7 @@ template std::optional TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, PackCallable pack) { for (; pos >= state.start; --pos) { - if (IsSchedInstruction(pos, state.base_start)) { + if (IsSchedInstruction(pos, state.start)) { continue; } const Instruction instr = state.program_code[pos]; @@ -291,7 +262,7 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) single_branch.ignore = true; break; } - if (IsSchedInstruction(offset, state.base_start)) { + if (IsSchedInstruction(offset, state.start)) { offset++; continue; } @@ -303,7 +274,6 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) } switch (opcode->get().GetId()) { - case OpCode::Id::RET: case OpCode::Id::EXIT: { const auto pred_index = static_cast(instr.pred.pred_index); single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); @@ -441,20 +411,13 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) case OpCode::Id::SSY: { const u32 target = offset + instr.bra.GetBranchTarget(); insert_label(state, target); - JumpItem it = {JumpLabel::SSYClass, target}; - state.jump_labels.emplace(offset, it); + state.ssy_labels.emplace(offset, target); break; } case OpCode::Id::PBK: { const u32 target = offset + instr.bra.GetBranchTarget(); insert_label(state, target); - JumpItem it = {JumpLabel::PBKClass, target}; - state.jump_labels.emplace(offset, it); - break; - } - case OpCode::Id::CAL: { - const u32 target = offset + instr.bra.GetBranchTarget(); - state.control.RegisterFunction(target); + state.pbk_labels.emplace(offset, target); break; } case OpCode::Id::BRX: { @@ -550,7 +513,7 @@ bool TryInspectAddress(CFGRebuildState& state) { } bool TryQuery(CFGRebuildState& state) { - const auto gather_labels = [](std::stack& cc, std::map& labels, + const auto gather_labels = [](std::stack& cc, std::map& labels, BlockInfo& block) { auto gather_start = labels.lower_bound(block.start); const auto gather_end = labels.upper_bound(block.end); @@ -559,19 +522,6 @@ bool TryQuery(CFGRebuildState& state) { ++gather_start; } }; - const auto pop_labels = [](JumpLabel type, SingleBranch* branch, Query& query) -> bool { - while (!query.stack.empty() && query.stack.top().type != type) { - query.stack.pop(); - } - if (query.stack.empty()) { - return false; - } - if (branch->address == unassigned_branch) { - branch->address = query.stack.top().address; - } - query.stack.pop(); - return true; - }; if (state.queries.empty()) { return false; } @@ -584,7 +534,8 @@ bool TryQuery(CFGRebuildState& state) { // consumes a label. Schedule new queries accordingly if (block.visited) { BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.stack.empty() || q.stack == stack.stack); + const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && + (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); state.queries.pop_front(); return all_okay; } @@ -593,7 +544,8 @@ bool TryQuery(CFGRebuildState& state) { Query q2(q); state.queries.pop_front(); - gather_labels(q2.stack, state.jump_labels, block); + gather_labels(q2.ssy_stack, state.ssy_labels, block); + gather_labels(q2.pbk_stack, state.pbk_labels, block); if (std::holds_alternative(*block.branch)) { auto* branch = std::get_if(block.branch.get()); if (!branch->condition.IsUnconditional()) { @@ -603,10 +555,16 @@ bool TryQuery(CFGRebuildState& state) { auto& conditional_query = state.queries.emplace_back(q2); if (branch->is_sync) { - pop_labels(JumpLabel::SSYClass, branch, conditional_query); + if (branch->address == unassigned_branch) { + branch->address = conditional_query.ssy_stack.top(); + } + conditional_query.ssy_stack.pop(); } if (branch->is_brk) { - pop_labels(JumpLabel::PBKClass, branch, conditional_query); + if (branch->address == unassigned_branch) { + branch->address = conditional_query.pbk_stack.top(); + } + conditional_query.pbk_stack.pop(); } conditional_query.address = branch->address; return true; @@ -688,23 +646,25 @@ void DecompileShader(CFGRebuildState& state) { state.manager->Decompile(); } -ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_code, - u32 start_address, u32 base_start, const CompilerSettings& settings, - Registry& registry) { - ShaderFunction result_out{}; +} // Anonymous namespace + +std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, + const CompilerSettings& settings, + Registry& registry) { + auto result_out = std::make_unique(); if (settings.depth == CompileDepth::BruteForce) { - result_out.settings.depth = CompileDepth::BruteForce; + result_out->settings.depth = CompileDepth::BruteForce; return result_out; } - CFGRebuildState state{control, program_code, start_address, base_start, registry}; + CFGRebuildState state{program_code, start_address, registry}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); state.inspect_queries.push_back(state.start); while (!state.inspect_queries.empty()) { if (!TryInspectAddress(state)) { - result_out.settings.depth = CompileDepth::BruteForce; + result_out->settings.depth = CompileDepth::BruteForce; return result_out; } } @@ -715,7 +675,7 @@ ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_ if (settings.depth != CompileDepth::FlowStack) { // Decompile Stacks - state.queries.push_back(Query{state.start, {}}); + state.queries.push_back(Query{state.start, {}, {}}); decompiled = true; while (!state.queries.empty()) { if (!TryQuery(state)) { @@ -745,18 +705,19 @@ ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_ state.manager->ShowCurrentState("Of Shader"); state.manager->Clear(); } else { - result_out.start = start_address; - result_out.settings.depth = settings.depth; - result_out.manager = std::move(manager); - result_out.end = state.block_info.back().end + 1; - return result_out; + auto characteristics = std::make_unique(); + characteristics->start = start_address; + characteristics->settings.depth = settings.depth; + characteristics->manager = std::move(manager); + characteristics->end = state.block_info.back().end + 1; + return characteristics; } } - result_out.start = start_address; - result_out.settings.depth = + result_out->start = start_address; + result_out->settings.depth = use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; - result_out.blocks.clear(); + result_out->blocks.clear(); for (auto& block : state.block_info) { ShaderBlock new_block{}; new_block.start = block.start; @@ -765,20 +726,20 @@ ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_ if (!new_block.ignore_branch) { new_block.branch = block.branch; } - result_out.end = std::max(result_out.end, block.end); - result_out.blocks.push_back(new_block); + result_out->end = std::max(result_out->end, block.end); + result_out->blocks.push_back(new_block); } if (!use_flow_stack) { - result_out.labels = std::move(state.labels); + result_out->labels = std::move(state.labels); return result_out; } - auto back = result_out.blocks.begin(); + auto back = result_out->blocks.begin(); auto next = std::next(back); - while (next != result_out.blocks.end()) { + while (next != result_out->blocks.end()) { if (!state.labels.contains(next->start) && next->start == back->end + 1) { back->end = next->end; - next = result_out.blocks.erase(next); + next = result_out->blocks.erase(next); continue; } back = next; @@ -787,22 +748,4 @@ ShaderFunction ScanFunction(ProgramControl& control, const ProgramCode& program_ return result_out; } - -} // Anonymous namespace - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, Registry& registry) { - ProgramControl control{}; - auto result_out = std::make_unique(); - result_out->main = - ScanFunction(control, program_code, start_address, start_address, settings, registry); - while (!control.pending_functions.empty()) { - u32 address = control.pending_functions.front(); - auto fun = ScanFunction(control, program_code, address, start_address, settings, registry); - result_out->subfunctions.emplace(address, std::move(fun)); - control.pending_functions.pop_front(); - } - return result_out; -} - } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5ef2251b9..37bf96492 100755 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -5,7 +5,6 @@ #pragma once #include -#include #include #include #include @@ -102,7 +101,7 @@ struct ShaderBlock { } }; -struct ShaderFunction { +struct ShaderCharacteristics { std::list blocks{}; std::set labels{}; u32 start{}; @@ -111,12 +110,8 @@ struct ShaderFunction { CompilerSettings settings{}; }; -struct ShaderProgram { - ShaderFunction main; - std::map subfunctions; -}; - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, Registry& registry); +std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, + const CompilerSettings& settings, + Registry& registry); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 355c724a3..6576d1208 100755 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -64,52 +64,9 @@ std::optional TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, } // Anonymous namespace -class ExprDecoder { -public: - explicit ExprDecoder(ShaderIR& ir_) : ir(ir_) {} - - void operator()(const ExprAnd& expr) { - Visit(expr.operand1); - Visit(expr.operand2); - } - - void operator()(const ExprOr& expr) { - Visit(expr.operand1); - Visit(expr.operand2); - } - - void operator()(const ExprNot& expr) { - Visit(expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { - ir.used_predicates.insert(pred); - } - } - - void operator()(const ExprCondCode& expr) {} - - void operator()(const ExprVar& expr) {} - - void operator()(const ExprBoolean& expr) {} - - void operator()(const ExprGprEqual& expr) { - ir.used_registers.insert(expr.gpr); - } - - void Visit(const Expr& node) { - return std::visit(*this, *node); - } - -private: - ShaderIR& ir; -}; - class ASTDecoder { public: - explicit ASTDecoder(ShaderIR& ir_) : ir(ir_), decoder(ir_) {} + explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} void operator()(ASTProgram& ast) { ASTNode current = ast.nodes.GetFirst(); @@ -120,7 +77,6 @@ public: } void operator()(ASTIfThen& ast) { - decoder.Visit(ast.condition); ASTNode current = ast.nodes.GetFirst(); while (current) { Visit(current); @@ -140,18 +96,13 @@ public: void operator()(ASTBlockDecoded& ast) {} - void operator()(ASTVarSet& ast) { - decoder.Visit(ast.condition); - } + void operator()(ASTVarSet& ast) {} void operator()(ASTLabel& ast) {} - void operator()(ASTGoto& ast) { - decoder.Visit(ast.condition); - } + void operator()(ASTGoto& ast) {} void operator()(ASTDoWhile& ast) { - decoder.Visit(ast.condition); ASTNode current = ast.nodes.GetFirst(); while (current) { Visit(current); @@ -159,13 +110,9 @@ public: } } - void operator()(ASTReturn& ast) { - decoder.Visit(ast.condition); - } + void operator()(ASTReturn& ast) {} - void operator()(ASTBreak& ast) { - decoder.Visit(ast.condition); - } + void operator()(ASTBreak& ast) {} void Visit(ASTNode& node) { std::visit(*this, *node->GetInnerData()); @@ -178,113 +125,77 @@ public: private: ShaderIR& ir; - ExprDecoder decoder; }; void ShaderIR::Decode() { - const auto decode_function = ([this](ShaderFunction& shader_info) { - coverage_end = std::max(0, shader_info.end); - switch (shader_info.settings.depth) { - case CompileDepth::FlowStack: { - for (const auto& block : shader_info.blocks) { - basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); - } - break; - } - case CompileDepth::NoFlowStack: { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast(exit_branch); - for (const auto& block : blocks) { - if (shader_info.labels.contains(block.start)) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - break; - } - case CompileDepth::DecompileBackwards: - case CompileDepth::FullDecompile: { - program_manager = std::move(shader_info.manager); - disable_flow_stack = true; - decompiled = true; - ASTDecoder decoder{*this}; - ASTNode program = program_manager.GetProgram(); - decoder.Visit(program); - break; - } - default: - LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); - [[fallthrough]]; - case CompileDepth::BruteForce: { - const auto shader_end = static_cast(program_code.size()); - coverage_begin = main_offset; - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; ++label) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); - } - break; - } - } - if (settings.depth != shader_info.settings.depth) { - LOG_WARNING( - HW_GPU, - "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", - CompileDepthAsString(settings.depth), - CompileDepthAsString(shader_info.settings.depth)); - } - }); - const auto gen_function = - ([this](ShaderFunction& shader_info, u32 id) -> std::shared_ptr { - std::shared_ptr result; - if (decompiled) { - result = std::make_shared(std::move(program_manager), id, - shader_info.start, shader_info.end); - } else { - result = - std::make_shared(std::move(basic_blocks), disable_flow_stack, - id, shader_info.start, shader_info.end); - } - decompiled = false; - disable_flow_stack = false; - basic_blocks.clear(); - program_manager.Clear(); - return result; - }); std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; auto info = ScanFlow(program_code, main_offset, settings, registry); - u32 id_start = 1; - for (auto& pair : info->subfunctions) { - func_map.emplace(pair.first, id_start); - id_start++; + auto& shader_info = *info; + coverage_begin = shader_info.start; + coverage_end = shader_info.end; + switch (shader_info.settings.depth) { + case CompileDepth::FlowStack: { + for (const auto& block : shader_info.blocks) { + basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); + } + break; } - coverage_begin = info->main.start; - coverage_end = 0; - decode_function(info->main); - main_function = gen_function(info->main, 0); - subfunctions.resize(info->subfunctions.size()); - for (auto& pair : info->subfunctions) { - auto& func_info = pair.second; - decode_function(func_info); - u32 id = func_map[pair.first]; - subfunctions[id - 1] = gen_function(func_info, id); + case CompileDepth::NoFlowStack: { + disable_flow_stack = true; + const auto insert_block = [this](NodeBlock& nodes, u32 label) { + if (label == static_cast(exit_branch)) { + return; + } + basic_blocks.insert({label, nodes}); + }; + const auto& blocks = shader_info.blocks; + NodeBlock current_block; + u32 current_label = static_cast(exit_branch); + for (const auto& block : blocks) { + if (shader_info.labels.contains(block.start)) { + insert_block(current_block, current_label); + current_block.clear(); + current_label = block.start; + } + if (!block.ignore_branch) { + DecodeRangeInner(current_block, block.start, block.end); + InsertControlFlow(current_block, block); + } else { + DecodeRangeInner(current_block, block.start, block.end + 1); + } + } + insert_block(current_block, current_label); + break; + } + case CompileDepth::DecompileBackwards: + case CompileDepth::FullDecompile: { + program_manager = std::move(shader_info.manager); + disable_flow_stack = true; + decompiled = true; + ASTDecoder decoder{*this}; + ASTNode program = GetASTProgram(); + decoder.Visit(program); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); + [[fallthrough]]; + case CompileDepth::BruteForce: { + const auto shader_end = static_cast(program_code.size()); + coverage_begin = main_offset; + coverage_end = shader_end; + for (u32 label = main_offset; label < shader_end; ++label) { + basic_blocks.insert({label, DecodeRange(label, label + 1)}); + } + break; + } + } + if (settings.depth != shader_info.settings.depth) { + LOG_WARNING( + HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", + CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); } } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 2bc596512..5f88537bc 100755 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -33,7 +33,6 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { // With the previous preconditions, this instruction is a no-operation. break; } - case OpCode::Id::RET: case OpCode::Id::EXIT: { const ConditionCode cc = instr.flow_condition_code; UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); @@ -313,16 +312,6 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); break; } - case OpCode::Id::CAL: { - const u32 target = pc + instr.bra.GetBranchTarget(); - const auto it = func_map.find(target); - if (it == func_map.end()) { - UNREACHABLE(); - break; - } - bb.push_back(FunctionCall(it->second)); - break; - } default: UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 43a166b6f..c69681e8d 100755 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -339,6 +339,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const TextureType texture_type{instr.tlds.GetTextureType()}; const bool is_array{instr.tlds.IsArrayTexture()}; + UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); const Node4 components = GetTldsCode(instr, texture_type, is_array); @@ -820,7 +822,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is for (std::size_t i = 0; i < type_coord_count; ++i) { const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); coords.push_back( - GetRegister(last && !aoffi_enabled ? last_coord_register : (coord_register + i))); + GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); } const Node array = is_array ? GetRegister(array_register) : nullptr; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index a58e7c65e..b54d33763 100755 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -267,11 +267,10 @@ class PatchNode; class SmemNode; class GmemNode; class CommentNode; -class FunctionCallNode; using NodeData = std::variant; + LmemNode, SmemNode, GmemNode, CommentNode>; using Node = std::shared_ptr; using Node4 = std::array; using NodeBlock = std::vector; @@ -495,18 +494,6 @@ private: std::vector code; ///< Code to execute }; -class FunctionCallNode final : public AmendNode { -public: - explicit FunctionCallNode(u32 func_id_) : func_id{func_id_} {} - - [[nodiscard]] u32 GetFuncId() const { - return func_id; - } - -private: - u32 func_id; ///< Id of the function to call -}; - /// A general purpose register class GprNode final { public: diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index cef9c26bc..6a5b6940d 100755 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp @@ -19,11 +19,6 @@ Node Comment(std::string text) { return MakeNode(std::move(text)); } -/// Creates a function call -Node FunctionCall(u32 func_id) { - return MakeNode(func_id); -} - Node Immediate(u32 value) { return MakeNode(value); } diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 3f882cd25..1e0886185 100755 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -27,9 +27,6 @@ Node Conditional(Node condition, std::vector code); /// Creates a commentary node Node Comment(std::string text); -/// Creates a function call -Node FunctionCall(u32 func_id); - /// Creates an u32 immediate Node Immediate(u32 value); diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 94715b069..1cd7c14d7 100755 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -26,7 +26,7 @@ namespace VideoCommon::Shader { struct ShaderBlock; -constexpr u32 MAX_PROGRAM_LENGTH = 0x2000; +constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; struct ConstBuffer { constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) @@ -64,68 +64,16 @@ struct GlobalMemoryUsage { bool is_written{}; }; -class ShaderFunctionIR final { -public: - explicit ShaderFunctionIR(std::map&& basic_blocks_, bool disable_flow_stack_, - u32 id_, u32 coverage_begin_, u32 coverage_end_) - : basic_blocks{std::move(basic_blocks_)}, decompiled{false}, - disable_flow_stack{disable_flow_stack_}, id{id_}, coverage_begin{coverage_begin_}, - coverage_end{coverage_end_} {} - explicit ShaderFunctionIR(ASTManager&& program_manager_, u32 id_, u32 coverage_begin_, - u32 coverage_end_) - : program_manager{std::move(program_manager_)}, decompiled{true}, disable_flow_stack{true}, - id{id_}, coverage_begin{coverage_begin_}, coverage_end{coverage_end_} {} - - const std::map& GetBasicBlocks() const { - return basic_blocks; - } - - [[nodiscard]] bool IsFlowStackDisabled() const { - return disable_flow_stack; - } - - [[nodiscard]] bool IsDecompiled() const { - return decompiled; - } - - const ASTManager& GetASTManager() const { - return program_manager; - } - - [[nodiscard]] ASTNode GetASTProgram() const { - return program_manager.GetProgram(); - } - - [[nodiscard]] u32 GetASTNumVariables() const { - return program_manager.GetVariables(); - } - - [[nodiscard]] bool IsMain() const { - return id == 0; - } - - [[nodiscard]] u32 GetId() const { - return id; - } - -private: - std::map basic_blocks; - ASTManager program_manager{true, true}; - - bool decompiled{}; - bool disable_flow_stack{}; - u32 id{}; - - u32 coverage_begin{}; - u32 coverage_end{}; -}; - class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, Registry& registry_); ~ShaderIR(); + const std::map& GetBasicBlocks() const { + return basic_blocks; + } + const std::set& GetRegisters() const { return used_registers; } @@ -207,6 +155,26 @@ public: return header; } + bool IsFlowStackDisabled() const { + return disable_flow_stack; + } + + bool IsDecompiled() const { + return decompiled; + } + + const ASTManager& GetASTManager() const { + return program_manager; + } + + ASTNode GetASTProgram() const { + return program_manager.GetProgram(); + } + + u32 GetASTNumVariables() const { + return program_manager.GetVariables(); + } + u32 ConvertAddressToNvidiaSpace(u32 address) const { return (address - main_offset) * static_cast(sizeof(Tegra::Shader::Instruction)); } @@ -222,16 +190,7 @@ public: return num_custom_variables; } - std::shared_ptr GetMainFunction() const { - return main_function; - } - - const std::vector>& GetSubFunctions() const { - return subfunctions; - } - private: - friend class ExprDecoder; friend class ASTDecoder; struct SamplerInfo { @@ -494,10 +453,6 @@ private: std::vector amend_code; u32 num_custom_variables{}; - std::shared_ptr main_function; - std::vector> subfunctions; - std::unordered_map func_map; - std::set used_registers; std::set used_predicates; std::set used_input_attributes;