diff --git a/CMakeLists.txt b/CMakeLists.txt index a2cc8a813..83568f8a3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -305,7 +305,7 @@ find_package(ZLIB 1.2 REQUIRED) find_package(zstd 1.5 REQUIRED) if (NOT YUZU_USE_EXTERNAL_VULKAN_HEADERS) - find_package(Vulkan 1.3.256 REQUIRED) + find_package(Vulkan 1.3.274 REQUIRED) endif() if (ENABLE_LIBUSB) diff --git a/README.md b/README.md index 592664766..b5bf598a9 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 4028. +This is the source code for early-access 4029. ## Legal Notice diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 1311e66a9..123b3da7e 100755 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -39,7 +39,7 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { } using namespace Common::Literals; -constexpr u32 StackSize = 32_KiB; +constexpr u32 StackSize = 128_KiB; } // namespace diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp index 8e81c66a5..def888d15 100755 --- a/src/core/arm/nce/interpreter_visitor.cpp +++ b/src/core/arm/nce/interpreter_visitor.cpp @@ -5,8 +5,6 @@ #include "common/bit_cast.h" #include "core/arm/nce/interpreter_visitor.h" -#include - namespace Core { template @@ -249,6 +247,7 @@ bool InterpreterVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) { return false; } + // Size in bytes const u64 size = 4 << opc.ZeroExtend(); const u64 offset = imm19.SignExtend() << 2; const u64 address = this->GetPc() + offset; @@ -530,7 +529,7 @@ bool InterpreterVisitor::SIMDImmediate(bool wback, bool postindex, size_t scale, } case MemOp::Load: { u128 data{}; - m_memory.ReadBlock(address, &data, datasize); + m_memory.ReadBlock(address, &data, datasize / 8); this->SetVec(Vt, data); break; } diff --git a/src/core/arm/nce/visitor_base.h b/src/core/arm/nce/visitor_base.h index 8fb032912..6a2be3d9b 100755 --- a/src/core/arm/nce/visitor_base.h +++ b/src/core/arm/nce/visitor_base.h @@ -4,9 +4,15 @@ #pragma once +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + #include +#include #include +#pragma GCC diagnostic pop + namespace Core { class VisitorBase { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 104a4d329..706a479a3 100755 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -74,9 +74,17 @@ spv::ImageFormat GetImageFormat(ImageFormat format) { throw InvalidArgument("Invalid image format {}", format); } +Id GetImageSampledType(EmitContext& ctx, const ImageDescriptor& desc) { + if (desc.is_float) { + return ctx.F32[1]; + } else { + return ctx.U32[1]; + } +} + Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { const spv::ImageFormat format{GetImageFormat(desc.format)}; - const Id type{ctx.U32[1]}; + const Id type{GetImageSampledType(ctx, desc)}; switch (desc.type) { case TextureType::Color1D: return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 3ae9196bf..85f91a86c 100755 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -512,7 +512,7 @@ OPCODE(ImageQueryDimensions, U32x4, Opaq OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageRead, U32x4, Opaque, Opaque, ) -OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, Opaque, ) OPCODE(IsTextureScaled, U1, U32, ) OPCODE(IsImageScaled, U1, U32, ) diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 9cea8a5d9..384f3456b 100755 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -19,8 +19,10 @@ struct HostTranslateInfo { u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry ///< passthrough shaders - bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional - ///< control flow + bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional + ///< control flow + bool support_ufloat_write_as_uint{}; ///< True when the device supports writing float images + ///< as bitcasts to uint }; } // namespace Shader diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 9ce917c62..abdfe1686 100755 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -372,6 +372,10 @@ TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAdd return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf)); } +bool TexturePixelFormatIsFloat(Environment& env, const ConstBufferAddr& cbuf) { + return ReadTexturePixelFormat(env, cbuf) == TexturePixelFormat::B10G11R11_FLOAT; +} + class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, @@ -428,8 +432,9 @@ public: return desc.type == existing.type && desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && - desc.size_shift == existing.size_shift; + desc.size_shift == existing.size_shift && desc.is_float == existing.is_float; })}; + // TODO: handle is_float? image_descriptors[index].is_written |= desc.is_written; image_descriptors[index].is_read |= desc.is_read; return index; @@ -500,6 +505,19 @@ void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_ ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast(w)), max_value)); inst.ReplaceUsesWith(converted); } + +void PatchSmallFloatImageWrite(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + + const IR::Value old_value{inst.Arg(2)}; + const IR::F32 x(ir.BitCast(IR::U32(ir.CompositeExtract(old_value, 0)))); + const IR::F32 y(ir.BitCast(IR::U32(ir.CompositeExtract(old_value, 1)))); + const IR::F32 z(ir.BitCast(IR::U32(ir.CompositeExtract(old_value, 2)))); + const IR::F32 w(ir.BitCast(IR::U32(ir.CompositeExtract(old_value, 3)))); + const IR::Value converted = ir.CompositeConstruct(x, y, z, w); + inst.SetArg(2, converted); +} + } // Anonymous namespace void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info) { @@ -531,6 +549,9 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo inst->ReplaceOpcode(IndexedInstruction(*inst)); const auto& cbuf{texture_inst.cbuf}; + const bool is_float_write{!host_info.support_ufloat_write_as_uint && + inst->GetOpcode() == IR::Opcode::ImageWrite && + TexturePixelFormatIsFloat(env, cbuf)}; auto flags{inst->Flags()}; bool is_multisample{false}; switch (inst->GetOpcode()) { @@ -603,6 +624,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo .format = flags.image_format, .is_written = is_written, .is_read = is_read, + .is_float = is_float_write, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -662,6 +684,10 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo PatchTexelFetch(*texture_inst.block, *texture_inst.inst, pixel_format); } } + + if (is_float_write) { + PatchSmallFloatImageWrite(*texture_inst.block, *inst); + } } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 501fd84a6..fad65ab73 100755 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -42,6 +42,7 @@ enum class TexturePixelFormat : u32 { R16G16B16A16_SNORM, R16G16_SNORM, R16_SNORM, + B10G11R11_FLOAT, OTHER }; @@ -129,6 +130,7 @@ struct ImageDescriptor { ImageFormat format; bool is_written; bool is_read; + bool is_float; u32 cbuf_index; u32 cbuf_offset; u32 count; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c03f5b230..760af262b 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -417,7 +417,7 @@ void RasterizerOpenGL::DispatchCompute() { void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { const auto query_cache_type = MaxwellToVideoCoreQuery(type); if (!query_cache_type.has_value()) { - UNIMPLEMENTED_MSG("Reset query type: {}", type); + UNIMPLEMENTED_IF_MSG(type != VideoCommon::QueryType::Payload, "Reset query type: {}", type); return; } query_cache.ResetCounter(*query_cache_type); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b0aa5d02e..83c0d6cae 100755 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -245,6 +245,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .min_ssbo_alignment = static_cast(device.GetShaderStorageBufferAlignment()), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), .support_conditional_barrier = device.SupportsConditionalBarriers(), + .support_ufloat_write_as_uint = true, } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index e903f4582..14e2133ee 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -78,8 +78,15 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo } } // Anonymous namespace -Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) - : VideoCommon::BufferBase(null_params), tracker{4096} {} +Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) + : VideoCommon::BufferBase(null_params), tracker{4096} { + if (runtime.device.HasNullDescriptor()) { + return; + } + device = &runtime.device; + buffer = runtime.CreateNullBuffer(); + is_null = true; +} Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) @@ -93,8 +100,12 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { if (!device) { - // Null buffer, return a null descriptor + // Null buffer supported, return a null descriptor return VK_NULL_HANDLE; + } else if (is_null) { + // Null buffer not supported, adjust offset and size + offset = 0; + size = 0; } const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { return offset == view.offset && size == view.size && format == view.format; @@ -627,9 +638,12 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings< } void BufferCacheRuntime::ReserveNullBuffer() { - if (null_buffer) { - return; + if (!null_buffer) { + null_buffer = CreateNullBuffer(); } +} + +vk::Buffer BufferCacheRuntime::CreateNullBuffer() { VkBufferCreateInfo create_info{ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -644,15 +658,17 @@ void BufferCacheRuntime::ReserveNullBuffer() { if (device.IsExtTransformFeedbackSupported()) { create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; } - null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal); + vk::Buffer ret = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal); if (device.HasDebuggingToolAttached()) { - null_buffer.SetObjectNameEXT("Null buffer"); + ret.SetObjectNameEXT("Null buffer"); } scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = *ret](vk::CommandBuffer cmdbuf) { cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0); }); + + return ret; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 7fe8a3045..57e1c789c 100755 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -63,6 +63,7 @@ private: vk::Buffer buffer; std::vector views; VideoCommon::UsageTracker tracker; + bool is_null{}; }; class QuadArrayIndexBuffer; @@ -151,6 +152,7 @@ private: } void ReserveNullBuffer(); + vk::Buffer CreateNullBuffer(); const Device& device; MemoryAllocator& memory_allocator; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2f32b7d09..6a4f52b14 100755 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -388,6 +388,9 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .min_ssbo_alignment = static_cast(device.GetStorageBufferAlignment()), .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_conditional_barrier = device.SupportsConditionalBarriers(), + .support_ufloat_write_as_uint = driver_id != VK_DRIVER_ID_QUALCOMM_PROPRIETARY && + driver_id != VK_DRIVER_ID_MESA_TURNIP && + driver_id != VK_DRIVER_ID_ARM_PROPRIETARY, }; if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) { diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 297878d94..8bee3cf84 100755 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -76,6 +76,8 @@ static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture return Shader::TexturePixelFormat::R16G16_SNORM; case VideoCore::Surface::PixelFormat::R16_SNORM: return Shader::TexturePixelFormat::R16_SNORM; + case VideoCore::Surface::PixelFormat::B10G11R11_FLOAT: + return Shader::TexturePixelFormat::B10G11R11_FLOAT; default: return Shader::TexturePixelFormat::OTHER; } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index b85edd277..b33b3f277 100755 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -377,6 +377,8 @@ const char* ToString(VkResult result) noexcept { return "VK_OPERATION_DEFERRED_KHR"; case VkResult::VK_OPERATION_NOT_DEFERRED_KHR: return "VK_OPERATION_NOT_DEFERRED_KHR"; + case VkResult::VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR: + return "VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR"; case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT: return "VK_PIPELINE_COMPILE_REQUIRED_EXT"; case VkResult::VK_RESULT_MAX_ENUM: