From 989787a69ab34e9a23b9f651d7302c4d3f87ae9f Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Mon, 4 Dec 2023 03:26:49 +0100 Subject: [PATCH] early-access version 4005 --- README.md | 2 +- src/core/file_sys/fsmitm_romfsbuild.cpp | 130 +++++++++--------- src/core/file_sys/fsmitm_romfsbuild.h | 6 +- src/core/file_sys/romfs.cpp | 85 ++++++++---- src/core/file_sys/vfs_concat.cpp | 4 +- src/core/file_sys/vfs_concat.h | 2 +- src/core/file_sys/vfs_layered.cpp | 21 +-- .../glasm/emit_glasm_context_get_set.cpp | 10 +- .../backend/glasm/glasm_emit_context.cpp | 6 + .../backend/glsl/emit_glsl.h | 5 +- .../glsl/emit_glsl_context_get_set.cpp | 9 ++ .../backend/glsl/glsl_emit_context.cpp | 11 +- .../backend/spirv/emit_spirv.h | 6 + .../spirv/emit_spirv_context_get_set.cpp | 29 ++-- .../backend/spirv/spirv_emit_context.cpp | 6 + src/shader_recompiler/runtime_info.h | 11 +- .../renderer_opengl/gl_shader_cache.cpp | 20 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 13 +- src/video_core/vulkan_common/vulkan_device.h | 5 + 19 files changed, 234 insertions(+), 147 deletions(-) diff --git a/README.md b/README.md index 62243025b..b9f73a831 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 4004. +This is the source code for early-access 4005. ## Legal Notice diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp index 58c8bfaf0..de4dc5ed7 100755 --- a/src/core/file_sys/fsmitm_romfsbuild.cpp +++ b/src/core/file_sys/fsmitm_romfsbuild.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include "common/alignment.h" #include "common/assert.h" @@ -134,7 +135,7 @@ void RomFSBuildContext::VisitDirectory(VirtualDir romfs_dir, VirtualDir ext_dir, child->size = child->source->GetSize(); - AddFile(parent, child); + AddFile(parent, std::move(child)); } for (auto& child_romfs_dir : romfs_dir->GetSubdirectories()) { @@ -163,36 +164,24 @@ void RomFSBuildContext::VisitDirectory(VirtualDir romfs_dir, VirtualDir ext_dir, bool RomFSBuildContext::AddDirectory(std::shared_ptr parent_dir_ctx, std::shared_ptr dir_ctx) { - // Check whether it's already in the known directories. - const auto [it, is_new] = directories.emplace(dir_ctx->path, nullptr); - if (!is_new) { - return false; - } - // Add a new directory. num_dirs++; dir_table_size += sizeof(RomFSDirectoryEntry) + Common::AlignUp(dir_ctx->path_len - dir_ctx->cur_path_ofs, 4); - dir_ctx->parent = parent_dir_ctx; - it->second = dir_ctx; + dir_ctx->parent = std::move(parent_dir_ctx); + directories.emplace_back(std::move(dir_ctx)); return true; } bool RomFSBuildContext::AddFile(std::shared_ptr parent_dir_ctx, std::shared_ptr file_ctx) { - // Check whether it's already in the known files. - const auto [it, is_new] = files.emplace(file_ctx->path, nullptr); - if (!is_new) { - return false; - } - // Add a new file. num_files++; file_table_size += sizeof(RomFSFileEntry) + Common::AlignUp(file_ctx->path_len - file_ctx->cur_path_ofs, 4); - file_ctx->parent = parent_dir_ctx; - it->second = file_ctx; + file_ctx->parent = std::move(parent_dir_ctx); + files.emplace_back(std::move(file_ctx)); return true; } @@ -201,7 +190,7 @@ RomFSBuildContext::RomFSBuildContext(VirtualDir base_, VirtualDir ext_) : base(std::move(base_)), ext(std::move(ext_)) { root = std::make_shared(); root->path = "\0"; - directories.emplace(root->path, root); + directories.emplace_back(root); num_dirs = 1; dir_table_size = 0x18; @@ -210,28 +199,43 @@ RomFSBuildContext::RomFSBuildContext(VirtualDir base_, VirtualDir ext_) RomFSBuildContext::~RomFSBuildContext() = default; -std::multimap RomFSBuildContext::Build() { +std::vector> RomFSBuildContext::Build() { const u64 dir_hash_table_entry_count = romfs_get_hash_table_count(num_dirs); const u64 file_hash_table_entry_count = romfs_get_hash_table_count(num_files); dir_hash_table_size = 4 * dir_hash_table_entry_count; file_hash_table_size = 4 * file_hash_table_entry_count; - // Assign metadata pointers + // Assign metadata pointers. RomFSHeader header{}; - std::vector dir_hash_table(dir_hash_table_entry_count, ROMFS_ENTRY_EMPTY); - std::vector file_hash_table(file_hash_table_entry_count, ROMFS_ENTRY_EMPTY); + std::vector metadata(file_hash_table_size + file_table_size + dir_hash_table_size + + dir_table_size); + u32* const dir_hash_table_pointer = reinterpret_cast(metadata.data()); + u8* const dir_table_pointer = metadata.data() + dir_hash_table_size; + u32* const file_hash_table_pointer = + reinterpret_cast(metadata.data() + dir_hash_table_size + dir_table_size); + u8* const file_table_pointer = + metadata.data() + dir_hash_table_size + dir_table_size + file_hash_table_size; - std::vector dir_table(dir_table_size); - std::vector file_table(file_table_size); + std::span dir_hash_table(dir_hash_table_pointer, dir_hash_table_entry_count); + std::span file_hash_table(file_hash_table_pointer, file_hash_table_entry_count); + std::span dir_table(dir_table_pointer, dir_table_size); + std::span file_table(file_table_pointer, file_table_size); - std::shared_ptr cur_file; + // Initialize hash tables. + std::memset(dir_hash_table.data(), 0xFF, dir_hash_table.size_bytes()); + std::memset(file_hash_table.data(), 0xFF, file_hash_table.size_bytes()); + + // Sort tables by name. + std::sort(files.begin(), files.end(), + [](const auto& a, const auto& b) { return a->path < b->path; }); + std::sort(directories.begin(), directories.end(), + [](const auto& a, const auto& b) { return a->path < b->path; }); // Determine file offsets. u32 entry_offset = 0; std::shared_ptr prev_file = nullptr; - for (const auto& it : files) { - cur_file = it.second; + for (const auto& cur_file : files) { file_partition_size = Common::AlignUp(file_partition_size, 16); cur_file->offset = file_partition_size; file_partition_size += cur_file->size; @@ -243,34 +247,48 @@ std::multimap RomFSBuildContext::Build() { } // Assign deferred parent/sibling ownership. for (auto it = files.rbegin(); it != files.rend(); ++it) { - cur_file = it->second; + auto& cur_file = *it; cur_file->sibling = cur_file->parent->file; cur_file->parent->file = cur_file; } - std::shared_ptr cur_dir; - // Determine directory offsets. entry_offset = 0; - for (const auto& it : directories) { - cur_dir = it.second; + for (const auto& cur_dir : directories) { cur_dir->entry_offset = entry_offset; entry_offset += static_cast(sizeof(RomFSDirectoryEntry) + Common::AlignUp(cur_dir->path_len - cur_dir->cur_path_ofs, 4)); } // Assign deferred parent/sibling ownership. - for (auto it = directories.rbegin(); it->second != root; ++it) { - cur_dir = it->second; + for (auto it = directories.rbegin(); (*it) != root; ++it) { + auto& cur_dir = *it; cur_dir->sibling = cur_dir->parent->child; cur_dir->parent->child = cur_dir; } - std::multimap out; + // Create output map. + std::vector> out; + out.reserve(num_files + 2); + + // Set header fields. + header.header_size = sizeof(RomFSHeader); + header.file_hash_table_size = file_hash_table_size; + header.file_table_size = file_table_size; + header.dir_hash_table_size = dir_hash_table_size; + header.dir_table_size = dir_table_size; + header.file_partition_ofs = ROMFS_FILEPARTITION_OFS; + header.dir_hash_table_ofs = Common::AlignUp(header.file_partition_ofs + file_partition_size, 4); + header.dir_table_ofs = header.dir_hash_table_ofs + header.dir_hash_table_size; + header.file_hash_table_ofs = header.dir_table_ofs + header.dir_table_size; + header.file_table_ofs = header.file_hash_table_ofs + header.file_hash_table_size; + + std::vector header_data(sizeof(RomFSHeader)); + std::memcpy(header_data.data(), &header, header_data.size()); + out.emplace_back(0, std::make_shared(std::move(header_data))); // Populate file tables. - for (const auto& it : files) { - cur_file = it.second; + for (const auto& cur_file : files) { RomFSFileEntry cur_entry{}; cur_entry.parent = cur_file->parent->entry_offset; @@ -287,7 +305,7 @@ std::multimap RomFSBuildContext::Build() { cur_entry.name_size = name_size; - out.emplace(cur_file->offset + ROMFS_FILEPARTITION_OFS, std::move(cur_file->source)); + out.emplace_back(cur_file->offset + ROMFS_FILEPARTITION_OFS, std::move(cur_file->source)); std::memcpy(file_table.data() + cur_file->entry_offset, &cur_entry, sizeof(RomFSFileEntry)); std::memset(file_table.data() + cur_file->entry_offset + sizeof(RomFSFileEntry), 0, Common::AlignUp(cur_entry.name_size, 4)); @@ -296,8 +314,7 @@ std::multimap RomFSBuildContext::Build() { } // Populate dir tables. - for (const auto& it : directories) { - cur_dir = it.second; + for (const auto& cur_dir : directories) { RomFSDirectoryEntry cur_entry{}; cur_entry.parent = cur_dir == root ? 0 : cur_dir->parent->entry_offset; @@ -323,34 +340,13 @@ std::multimap RomFSBuildContext::Build() { cur_dir->path.data() + cur_dir->cur_path_ofs, name_size); } - // Set header fields. - header.header_size = sizeof(RomFSHeader); - header.file_hash_table_size = file_hash_table_size; - header.file_table_size = file_table_size; - header.dir_hash_table_size = dir_hash_table_size; - header.dir_table_size = dir_table_size; - header.file_partition_ofs = ROMFS_FILEPARTITION_OFS; - header.dir_hash_table_ofs = Common::AlignUp(header.file_partition_ofs + file_partition_size, 4); - header.dir_table_ofs = header.dir_hash_table_ofs + header.dir_hash_table_size; - header.file_hash_table_ofs = header.dir_table_ofs + header.dir_table_size; - header.file_table_ofs = header.file_hash_table_ofs + header.file_hash_table_size; + // Write metadata. + out.emplace_back(header.dir_hash_table_ofs, + std::make_shared(std::move(metadata))); - std::vector header_data(sizeof(RomFSHeader)); - std::memcpy(header_data.data(), &header, header_data.size()); - out.emplace(0, std::make_shared(std::move(header_data))); - - std::vector metadata(file_hash_table_size + file_table_size + dir_hash_table_size + - dir_table_size); - std::size_t index = 0; - std::memcpy(metadata.data(), dir_hash_table.data(), dir_hash_table.size() * sizeof(u32)); - index += dir_hash_table.size() * sizeof(u32); - std::memcpy(metadata.data() + index, dir_table.data(), dir_table.size()); - index += dir_table.size(); - std::memcpy(metadata.data() + index, file_hash_table.data(), - file_hash_table.size() * sizeof(u32)); - index += file_hash_table.size() * sizeof(u32); - std::memcpy(metadata.data() + index, file_table.data(), file_table.size()); - out.emplace(header.dir_hash_table_ofs, std::make_shared(std::move(metadata))); + // Sort the output. + std::sort(out.begin(), out.end(), + [](const auto& a, const auto& b) { return a.first < b.first; }); return out; } diff --git a/src/core/file_sys/fsmitm_romfsbuild.h b/src/core/file_sys/fsmitm_romfsbuild.h index e63340992..b69bee23a 100755 --- a/src/core/file_sys/fsmitm_romfsbuild.h +++ b/src/core/file_sys/fsmitm_romfsbuild.h @@ -22,14 +22,14 @@ public: ~RomFSBuildContext(); // This finalizes the context. - std::multimap Build(); + std::vector> Build(); private: VirtualDir base; VirtualDir ext; std::shared_ptr root; - std::map, std::less<>> directories; - std::map, std::less<>> files; + std::vector> directories; + std::vector> files; u64 num_dirs = 0; u64 num_files = 0; u64 dir_table_size = 0; diff --git a/src/core/file_sys/romfs.cpp b/src/core/file_sys/romfs.cpp index 8b0e65c53..cc3d540f5 100755 --- a/src/core/file_sys/romfs.cpp +++ b/src/core/file_sys/romfs.cpp @@ -55,44 +55,68 @@ struct FileEntry { }; static_assert(sizeof(FileEntry) == 0x20, "FileEntry has incorrect size."); -template -std::pair GetEntry(const VirtualFile& file, std::size_t offset) { - Entry entry{}; - if (file->ReadObject(&entry, offset) != sizeof(Entry)) +struct RomFSTraversalContext { + RomFSHeader header; + VirtualFile file; + std::vector directory_meta; + std::vector file_meta; +}; + +template +std::pair GetEntry(const RomFSTraversalContext& ctx, size_t offset) { + const size_t entry_end = offset + sizeof(EntryType); + const std::vector& vec = ctx.*Member; + const size_t size = vec.size(); + const u8* data = vec.data(); + EntryType entry{}; + + if (entry_end > size) { return {}; - std::string string(entry.name_length, '\0'); - if (file->ReadArray(&string[0], string.size(), offset + sizeof(Entry)) != string.size()) - return {}; - return {entry, string}; + } + std::memcpy(&entry, data + offset, sizeof(EntryType)); + + const size_t name_length = std::min(entry_end + entry.name_length, size) - entry_end; + std::string name(reinterpret_cast(data + entry_end), name_length); + + return {entry, std::move(name)}; } -void ProcessFile(const VirtualFile& file, std::size_t file_offset, std::size_t data_offset, - u32 this_file_offset, std::shared_ptr& parent) { - while (this_file_offset != ROMFS_ENTRY_EMPTY) { - auto entry = GetEntry(file, file_offset + this_file_offset); +std::pair GetDirectoryEntry(const RomFSTraversalContext& ctx, + size_t directory_offset) { + return GetEntry(ctx, directory_offset); +} - parent->AddFile(std::make_shared( - file, entry.first.size, entry.first.offset + data_offset, entry.second)); +std::pair GetFileEntry(const RomFSTraversalContext& ctx, + size_t file_offset) { + return GetEntry(ctx, file_offset); +} + +void ProcessFile(const RomFSTraversalContext& ctx, u32 this_file_offset, + std::shared_ptr& parent) { + while (this_file_offset != ROMFS_ENTRY_EMPTY) { + auto entry = GetFileEntry(ctx, this_file_offset); + + parent->AddFile(std::make_shared(ctx.file, entry.first.size, + entry.first.offset + ctx.header.data_offset, + std::move(entry.second))); this_file_offset = entry.first.sibling; } } -void ProcessDirectory(const VirtualFile& file, std::size_t dir_offset, std::size_t file_offset, - std::size_t data_offset, u32 this_dir_offset, +void ProcessDirectory(const RomFSTraversalContext& ctx, u32 this_dir_offset, std::shared_ptr& parent) { while (this_dir_offset != ROMFS_ENTRY_EMPTY) { - auto entry = GetEntry(file, dir_offset + this_dir_offset); + auto entry = GetDirectoryEntry(ctx, this_dir_offset); auto current = std::make_shared( std::vector{}, std::vector{}, entry.second); if (entry.first.child_file != ROMFS_ENTRY_EMPTY) { - ProcessFile(file, file_offset, data_offset, entry.first.child_file, current); + ProcessFile(ctx, entry.first.child_file, current); } if (entry.first.child_dir != ROMFS_ENTRY_EMPTY) { - ProcessDirectory(file, dir_offset, file_offset, data_offset, entry.first.child_dir, - current); + ProcessDirectory(ctx, entry.first.child_dir, current); } parent->AddDirectory(current); @@ -107,22 +131,25 @@ VirtualDir ExtractRomFS(VirtualFile file) { return root_container; } - RomFSHeader header{}; - if (file->ReadObject(&header) != sizeof(RomFSHeader)) { - return root_container; + RomFSTraversalContext ctx{}; + + if (file->ReadObject(&ctx.header) != sizeof(RomFSHeader)) { + return nullptr; } - if (header.header_size != sizeof(RomFSHeader)) { - return root_container; + if (ctx.header.header_size != sizeof(RomFSHeader)) { + return nullptr; } - const u64 file_offset = header.file_meta.offset; - const u64 dir_offset = header.directory_meta.offset; + ctx.file = file; + ctx.directory_meta = + file->ReadBytes(ctx.header.directory_meta.size, ctx.header.directory_meta.offset); + ctx.file_meta = file->ReadBytes(ctx.header.file_meta.size, ctx.header.file_meta.offset); - ProcessDirectory(file, dir_offset, file_offset, header.data_offset, 0, root_container); + ProcessDirectory(ctx, 0, root_container); if (auto root = root_container->GetSubdirectory(""); root) { - return std::make_shared(std::move(root)); + return root; } ASSERT(false); diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp index c6141b230..815acd5ce 100755 --- a/src/core/file_sys/vfs_concat.cpp +++ b/src/core/file_sys/vfs_concat.cpp @@ -59,8 +59,8 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(std::string&& name, return VirtualFile(new ConcatenatedVfsFile(std::move(name), std::move(concatenation_map))); } -VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, std::string&& name, - std::multimap&& files) { +VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile( + u8 filler_byte, std::string&& name, std::vector>&& files) { // Fold trivial cases. if (files.empty()) { return nullptr; diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h index 9b7a45719..6e484bea3 100755 --- a/src/core/file_sys/vfs_concat.h +++ b/src/core/file_sys/vfs_concat.h @@ -37,7 +37,7 @@ public: /// Convenience function that turns a map of offsets to files into a concatenated file, filling /// gaps with a given filler byte. static VirtualFile MakeConcatenatedFile(u8 filler_byte, std::string&& name, - std::multimap&& files); + std::vector>&& files); std::string GetName() const override; std::size_t GetSize() const override; diff --git a/src/core/file_sys/vfs_layered.cpp b/src/core/file_sys/vfs_layered.cpp index fed159f62..2327e2007 100755 --- a/src/core/file_sys/vfs_layered.cpp +++ b/src/core/file_sys/vfs_layered.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include "core/file_sys/vfs_layered.h" @@ -59,13 +60,12 @@ std::string LayeredVfsDirectory::GetFullPath() const { std::vector LayeredVfsDirectory::GetFiles() const { std::vector out; - std::set> out_names; + std::unordered_set out_names; for (const auto& layer : dirs) { for (auto& file : layer->GetFiles()) { - auto file_name = file->GetName(); - if (!out_names.contains(file_name)) { - out_names.emplace(std::move(file_name)); + const auto [it, is_new] = out_names.emplace(file->GetName()); + if (is_new) { out.emplace_back(std::move(file)); } } @@ -75,18 +75,19 @@ std::vector LayeredVfsDirectory::GetFiles() const { } std::vector LayeredVfsDirectory::GetSubdirectories() const { - std::vector names; + std::vector out; + std::unordered_set out_names; + for (const auto& layer : dirs) { for (const auto& sd : layer->GetSubdirectories()) { - if (std::find(names.begin(), names.end(), sd->GetName()) == names.end()) - names.push_back(sd->GetName()); + out_names.emplace(sd->GetName()); } } - std::vector out; - out.reserve(names.size()); - for (const auto& subdir : names) + out.reserve(out_names.size()); + for (const auto& subdir : out_names) { out.emplace_back(GetSubdirectory(subdir)); + } return out; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index f0faf786f..b516a94b2 100755 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glasm/glasm_emit_context.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/shader_info.h" namespace Shader::Backend::GLASM { @@ -23,7 +24,14 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU } if (binding.IsImmediate()) { - ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); + const u32 binding_index{binding.U32()}; + const u32 max_num_cbufs{ctx.runtime_info.max_num_cbufs}; + if (binding_index >= max_num_cbufs) { + // cbuf index exceeds device limit + ctx.Add("MOV.S {},0;", ret); + return; + } + ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding_index, offset); return; } diff --git a/src/shader_recompiler/backend/glasm/glasm_emit_context.cpp b/src/shader_recompiler/backend/glasm/glasm_emit_context.cpp index 1b7207acf..f654feaf4 100755 --- a/src/shader_recompiler/backend/glasm/glasm_emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/glasm_emit_context.cpp @@ -37,6 +37,12 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (desc.count != 1) { throw NotImplementedException("Constant buffer descriptor array"); } + if (cbuf_index >= runtime_info.max_num_cbufs) { + LOG_WARNING(Shader_GLASM, "Constant buffer binding index {} exceeds device limit of {}", + cbuf_index, runtime_info.max_num_cbufs); + ++cbuf_index; + continue; + } Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); ++cbuf_index; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h index 74ccc82ae..62913166c 100755 --- a/src/shader_recompiler/backend/glsl/emit_glsl.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -15,9 +15,10 @@ namespace Shader::Backend::GLSL { [[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); -[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) { +[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program) { Bindings binding; - return EmitGLSL(profile, {}, program, binding); + return EmitGLSL(profile, runtime_info, program, binding); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 79dac8124..b662ae0f7 100755 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -46,6 +46,15 @@ std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_v void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, const IR::Value& offset, u32 num_bits, std::string_view cast = {}, std::string_view bit_offset = {}) { + if (binding.IsImmediate()) { + const u32 binding_index{binding.U32()}; + const u32 max_num_cbufs{ctx.runtime_info.max_num_cbufs}; + if (binding_index >= max_num_cbufs) { + // cbuf index exceeds device limit + ctx.Add("{}=0u;", ret); + return; + } + } const bool is_immediate{offset.IsImmediate()}; const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug}; if (is_immediate) { diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 1613fc0b3..4e5936c78 100755 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/div_ceil.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glsl/glsl_emit_context.h" #include "shader_recompiler/frontend/ir/program.h" @@ -430,10 +431,18 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) { return; } for (const auto& desc : info.constant_buffer_descriptors) { + if (bindings.uniform_buffer >= runtime_info.max_num_cbufs) { + LOG_WARNING(Shader_GLSL, "Constant buffer binding index {} exceeds device limit of {}", + bindings.uniform_buffer, runtime_info.max_num_cbufs); + bindings.uniform_buffer += desc.count; + continue; + } const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"}; + const u32 cbuf_used_size{Common::DivCeil(info.constant_buffer_used_sizes[desc.index], 16U)}; + const u32 cbuf_binding_size{info.uses_global_memory ? 0x1000U : cbuf_used_size}; header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};", bindings.uniform_buffer, stage_name, desc.index, cbuf_type, - stage_name, desc.index, 4 * 1024); + stage_name, desc.index, cbuf_binding_size); bindings.uniform_buffer += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 497c02da7..a6980bc82 100755 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -38,4 +38,10 @@ constexpr u32 RENDERAREA_LAYOUT_OFFSET = offsetof(RenderAreaLayout, render_area) return EmitSPIRV(profile, {}, program, binding); } +[[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, + const RuntimeInfo& runtime_info, + IR::Program& program) { + Bindings binding; + return EmitSPIRV(profile, runtime_info, program, binding); +} } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 29a40bdfd..95293ea12 100755 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -122,25 +122,24 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, if (!binding.IsImmediate()) { return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); } - - const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; + const bool is_float{UniformDefinitions::IsFloat(member_ptr)}; + const Id zero_val{is_float ? ctx.Const(0.0f) : ctx.Const(0u)}; + const u32 binding_index{binding.U32()}; + const u32 max_num_cbufs{ctx.runtime_info.max_num_cbufs}; + if (binding_index >= max_num_cbufs) { + // cbuf index exceeds device limit + return zero_val; + } + const Id cbuf{ctx.cbufs[binding_index].*member_ptr}; const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; - const Id val = ctx.OpLoad(result_type, access_chain); - + const Id val{ctx.OpLoad(result_type, access_chain)}; if (offset.IsImmediate() || !ctx.profile.has_broken_robust) { return val; } - - const auto is_float = UniformDefinitions::IsFloat(member_ptr); - const auto num_elements = UniformDefinitions::NumElements(member_ptr); - const std::array zero_vec{ - is_float ? ctx.Const(0.0f) : ctx.Const(0u), - is_float ? ctx.Const(0.0f) : ctx.Const(0u), - is_float ? ctx.Const(0.0f) : ctx.Const(0u), - is_float ? ctx.Const(0.0f) : ctx.Const(0u), - }; - const Id cond = ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu)); - const Id zero = ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements)); + const auto num_elements{UniformDefinitions::NumElements(member_ptr)}; + const std::array zero_vec{zero_val, zero_val, zero_val, zero_val}; + const Id cond{ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu))}; + const Id zero{ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements))}; return ctx.OpSelect(result_type, cond, val, zero); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 7a07c8862..f1f404645 100755 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -278,6 +278,12 @@ void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinition ctx.uniform_types.*member_type = uniform_type; for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + if (desc.index + desc.count > ctx.runtime_info.max_num_cbufs) { + LOG_WARNING(Shader_SPIRV, "Constant buffer binding index {} exceeds device limit of {}", + desc.index, ctx.runtime_info.max_num_cbufs); + binding += desc.count; + continue; + } const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; ctx.Decorate(id, spv::Decoration::Binding, binding); ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index d632deca6..a79f3f389 100755 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -62,8 +62,8 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array generic_input_types{}; - VaryingState previous_stage_stores; - std::map previous_stage_legacy_stores_mapping; + VaryingState previous_stage_stores{}; + std::map previous_stage_legacy_stores_mapping{}; bool convert_depth_mode{}; bool force_early_z{}; @@ -74,8 +74,8 @@ struct RuntimeInfo { InputTopology input_topology{}; - std::optional fixed_state_point_size; - std::optional alpha_test_func; + std::optional fixed_state_point_size{}; + std::optional alpha_test_func{}; float alpha_test_reference{}; /// Static Y negate value @@ -86,6 +86,9 @@ struct RuntimeInfo { /// Transform feedback state for each varying std::array xfb_varyings{}; u32 xfb_count{0}; + + /// Maximum number of UBO/CBUF bindings allowed by the host device + u32 max_num_cbufs{32}; }; } // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index eb6f5d92d..fcee1d3af 100755 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -72,7 +72,8 @@ Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topolo Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program, - bool glasm_use_storage_buffers, bool use_assembly_shaders) { + bool glasm_use_storage_buffers, bool use_assembly_shaders, + u32 max_num_cbufs) { Shader::RuntimeInfo info; if (previous_program) { info.previous_stage_stores = previous_program->info.stores; @@ -152,6 +153,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, break; } info.glasm_use_storage_buffers = glasm_use_storage_buffers; + info.max_num_cbufs = max_num_cbufs; return info; } @@ -522,8 +524,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const auto runtime_info{ - MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; + const u32 max_num_cbufs{device.GetMaxUniformBuffers(program.stage)}; + const auto runtime_info{MakeRuntimeInfo( + key, program, previous_program, glasm_use_storage_buffers, use_glasm, max_num_cbufs)}; switch (device.GetShaderBackend()) { case Settings::ShaderBackend::Glsl: ConvertLegacyToGeneric(program, runtime_info); @@ -580,20 +583,21 @@ std::unique_ptr ShaderCache::CreateComputePipeline( auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const u32 num_storage_buffers{Shader::NumDescriptors(program.info.storage_buffers_descriptors)}; - Shader::RuntimeInfo info; - info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - + const Shader::RuntimeInfo info{ + .glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(), + .max_num_cbufs = device.GetMaxUniformBuffers(program.stage), + }; std::string code{}; std::vector code_spirv; switch (device.GetShaderBackend()) { case Settings::ShaderBackend::Glsl: - code = EmitGLSL(profile, program); + code = EmitGLSL(profile, info, program); break; case Settings::ShaderBackend::Glasm: code = EmitGLASM(profile, info, program); break; case Settings::ShaderBackend::SpirV: - code_spirv = EmitSPIRV(profile, program); + code_spirv = EmitSPIRV(profile, info, program); break; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1adb6d90a..0b614cc22 100755 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -145,7 +145,8 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde Shader::RuntimeInfo MakeRuntimeInfo(std::span programs, const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, - const Shader::IR::Program* previous_program) { + const Shader::IR::Program* previous_program, + u32 max_num_cbufs) { Shader::RuntimeInfo info; if (previous_program) { info.previous_stage_stores = previous_program->info.stores; @@ -261,6 +262,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program } info.force_early_z = key.state.early_z != 0; info.y_negate = key.state.y_negate != 0; + info.max_num_cbufs = max_num_cbufs; return info; } @@ -655,6 +657,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const Shader::IR::Program* previous_stage{}; Shader::Backend::Bindings binding; + const u32 max_num_cbufs{static_cast(device.GetMaxPerStageUniformBuffers())}; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { const bool is_emulated_stage = layer_source_program != nullptr && @@ -668,7 +671,8 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; + const auto runtime_info{ + MakeRuntimeInfo(programs, key, program, previous_stage, max_num_cbufs)}; ConvertLegacyToGeneric(program, runtime_info); const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); @@ -764,7 +768,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( } auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; - const std::vector code{EmitSPIRV(profile, program)}; + const Shader::RuntimeInfo info{ + .max_num_cbufs = static_cast(device.GetMaxPerStageUniformBuffers()), + }; + const std::vector code{EmitSPIRV(profile, info, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 5e8c431dd..8f461e238 100755 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -299,6 +299,11 @@ public: return properties.properties.limits.maxComputeSharedMemorySize; } + /// Returns the maximum number of uniform buffers allowed per stage. + VkDeviceSize GetMaxPerStageUniformBuffers() const { + return properties.properties.limits.maxPerStageDescriptorUniformBuffers; + } + /// Returns float control properties of the device. const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { return properties.float_controls;