early-access version 2642

This commit is contained in:
pineappleEA 2022-04-04 00:41:58 +02:00
parent 3236e6f599
commit ec43dfdade
9 changed files with 64 additions and 58 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 2641.
This is the source code for early-access 2642.
## Legal Notice

View file

@ -10,25 +10,49 @@
#include "common/uint128.h"
#include "common/x64/native_clock.h"
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace Common {
#ifdef _MSC_VER
__forceinline static u64 FencedRDTSC() {
_mm_lfence();
_ReadWriteBarrier();
const u64 result = __rdtsc();
_mm_lfence();
_ReadWriteBarrier();
return result;
}
#else
static u64 FencedRDTSC() {
u64 result;
asm volatile("lfence\n\t"
"rdtsc\n\t"
"shl $32, %%rdx\n\t"
"or %%rdx, %0\n\t"
"lfence"
: "=a"(result)
:
: "rdx", "memory", "cc");
return result;
}
#endif
u64 EstimateRDTSCFrequency() {
// Discard the first result measuring the rdtsc.
_mm_mfence();
__rdtsc();
FencedRDTSC();
std::this_thread::sleep_for(std::chrono::milliseconds{1});
_mm_mfence();
__rdtsc();
FencedRDTSC();
// Get the current time.
const auto start_time = std::chrono::steady_clock::now();
_mm_mfence();
const u64 tsc_start = __rdtsc();
const u64 tsc_start = FencedRDTSC();
// Wait for 200 milliseconds.
std::this_thread::sleep_for(std::chrono::milliseconds{200});
const auto end_time = std::chrono::steady_clock::now();
_mm_mfence();
const u64 tsc_end = __rdtsc();
const u64 tsc_end = FencedRDTSC();
// Calculate differences.
const u64 timer_diff = static_cast<u64>(
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
@ -42,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
u64 rtsc_frequency_)
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
rtsc_frequency_} {
_mm_mfence();
time_point.inner.last_measure = __rdtsc();
time_point.inner.last_measure = FencedRDTSC();
time_point.inner.accumulated_ticks = 0U;
ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
@ -58,8 +81,7 @@ u64 NativeClock::GetRTSC() {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do {
_mm_mfence();
const u64 current_measure = __rdtsc();
const u64 current_measure = FencedRDTSC();
u64 diff = current_measure - current_time_point.inner.last_measure;
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
@ -80,8 +102,7 @@ void NativeClock::Pause(bool is_paused) {
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
do {
new_time_point.pack = current_time_point.pack;
_mm_mfence();
new_time_point.inner.last_measure = __rdtsc();
new_time_point.inner.last_measure = FencedRDTSC();
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
current_time_point.pack, current_time_point.pack));
}

View file

@ -40,9 +40,6 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std
}
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
// Mark any pre-existing rasterizer memory in this range as remapped
rasterizer->ModifyGPUMemory(gpu_addr, size);
const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
if (it != map_ranges.end() && it->first == gpu_addr) {
it->second = size;

View file

@ -200,7 +200,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
});
}
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
const bool is_rescaling = info.uses_rescaling_uniform;
scheduler.Record([this, descriptor_data, is_rescaling,
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);

View file

@ -238,6 +238,7 @@ GraphicsPipeline::GraphicsPipeline(
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
num_textures += Shader::NumDescriptors(info->texture_descriptors);
uses_rescale_unfiorm |= info->uses_rescaling_uniform;
}
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
@ -471,7 +472,8 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
});
}
const bool is_rescaling{texture_cache.IsRescaling()};
const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)};
const bool update_rescaling{uses_rescale_unfiorm ? scheduler.UpdateRescaling(is_rescaling)
: false};
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
@ -479,10 +481,12 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
if (bind_pipeline) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
}
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
rescaling_data.data());
if (update_rescaling) {
if (uses_rescale_unfiorm) {
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
rescaling_data.data());
}
if (uses_rescale_unfiorm && update_rescaling) {
const f32 config_down_factor{Settings::values.resolution_info.down_factor};
const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f};
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,

View file

@ -151,6 +151,7 @@ private:
std::mutex build_mutex;
std::atomic_bool is_built{false};
bool uses_push_descriptor{false};
bool uses_rescale_unfiorm{false};
};
} // namespace Vulkan

View file

@ -234,12 +234,9 @@ void RasterizerVulkan::Clear() {
const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer);
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
const bool is_rescaling = texture_cache.IsRescaling();
const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U;
const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U;
UpdateViewportsState(regs);
VkClearRect clear_rect{
@ -695,12 +692,9 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
if (!state_tracker.TouchScissors()) {
return;
}
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
const bool is_rescaling = texture_cache.IsRescaling();
const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U;
const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U;
const std::array scissors{
GetScissorState(regs, 0, up_scale, down_shift),
GetScissorState(regs, 1, up_scale, down_shift),

View file

@ -328,7 +328,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
}
const bool rescaled = RescaleRenderTargets(is_clear);
if (is_rescaling != rescaled) {
const auto& resolution_info = Settings::values.resolution_info;
if (resolution_info.active && is_rescaling != rescaled) {
flags[Dirty::RescaleViewports] = true;
flags[Dirty::RescaleScissors] = true;
is_rescaling = rescaled;
@ -345,12 +346,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
for (size_t index = 0; index < NUM_RT; ++index) {
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
}
u32 up_scale = 1;
u32 down_shift = 0;
if (is_rescaling) {
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
const u32 up_scale = is_rescaling ? resolution_info.up_scale : 1U;
const u32 down_shift = is_rescaling ? resolution_info.down_shift : 0U;
render_targets.size = Extent2D{
(maxwell3d.regs.render_area.width * up_scale) >> down_shift,
(maxwell3d.regs.render_area.height * up_scale) >> down_shift,
@ -454,20 +451,15 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
});
for (const ImageId image_id : images) {
DownloadImage(image_id);
Image& image = slot_images[image_id];
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
}
template <class P>
void TextureCache<P>::DownloadImage(ImageId image_id) {
Image& image = slot_images[image_id];
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
template <class P>
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
std::vector<ImageId> deleted_images;
@ -1063,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId overlap_id : ignore_textures) {
Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
DownloadImage(overlap_id);
UNIMPLEMENTED();
}
if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id);

View file

@ -139,9 +139,6 @@ public:
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory
void DownloadImage(ImageId image_id);
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);