early-access version 2642
This commit is contained in:
parent
3236e6f599
commit
ec43dfdade
9 changed files with 64 additions and 58 deletions
|
@ -1,7 +1,7 @@
|
|||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 2641.
|
||||
This is the source code for early-access 2642.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
|
|
@ -10,25 +10,49 @@
|
|||
#include "common/uint128.h"
|
||||
#include "common/x64/native_clock.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
|
||||
#ifdef _MSC_VER
|
||||
__forceinline static u64 FencedRDTSC() {
|
||||
_mm_lfence();
|
||||
_ReadWriteBarrier();
|
||||
const u64 result = __rdtsc();
|
||||
_mm_lfence();
|
||||
_ReadWriteBarrier();
|
||||
return result;
|
||||
}
|
||||
#else
|
||||
static u64 FencedRDTSC() {
|
||||
u64 result;
|
||||
asm volatile("lfence\n\t"
|
||||
"rdtsc\n\t"
|
||||
"shl $32, %%rdx\n\t"
|
||||
"or %%rdx, %0\n\t"
|
||||
"lfence"
|
||||
: "=a"(result)
|
||||
:
|
||||
: "rdx", "memory", "cc");
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
u64 EstimateRDTSCFrequency() {
|
||||
// Discard the first result measuring the rdtsc.
|
||||
_mm_mfence();
|
||||
__rdtsc();
|
||||
FencedRDTSC();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{1});
|
||||
_mm_mfence();
|
||||
__rdtsc();
|
||||
FencedRDTSC();
|
||||
|
||||
// Get the current time.
|
||||
const auto start_time = std::chrono::steady_clock::now();
|
||||
_mm_mfence();
|
||||
const u64 tsc_start = __rdtsc();
|
||||
const u64 tsc_start = FencedRDTSC();
|
||||
// Wait for 200 milliseconds.
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{200});
|
||||
const auto end_time = std::chrono::steady_clock::now();
|
||||
_mm_mfence();
|
||||
const u64 tsc_end = __rdtsc();
|
||||
const u64 tsc_end = FencedRDTSC();
|
||||
// Calculate differences.
|
||||
const u64 timer_diff = static_cast<u64>(
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
|
||||
|
@ -42,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
|
|||
u64 rtsc_frequency_)
|
||||
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
|
||||
rtsc_frequency_} {
|
||||
_mm_mfence();
|
||||
time_point.inner.last_measure = __rdtsc();
|
||||
time_point.inner.last_measure = FencedRDTSC();
|
||||
time_point.inner.accumulated_ticks = 0U;
|
||||
ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
|
||||
us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
|
||||
|
@ -58,8 +81,7 @@ u64 NativeClock::GetRTSC() {
|
|||
|
||||
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
||||
do {
|
||||
_mm_mfence();
|
||||
const u64 current_measure = __rdtsc();
|
||||
const u64 current_measure = FencedRDTSC();
|
||||
u64 diff = current_measure - current_time_point.inner.last_measure;
|
||||
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
|
||||
new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
|
||||
|
@ -80,8 +102,7 @@ void NativeClock::Pause(bool is_paused) {
|
|||
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
||||
do {
|
||||
new_time_point.pack = current_time_point.pack;
|
||||
_mm_mfence();
|
||||
new_time_point.inner.last_measure = __rdtsc();
|
||||
new_time_point.inner.last_measure = FencedRDTSC();
|
||||
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
||||
current_time_point.pack, current_time_point.pack));
|
||||
}
|
||||
|
|
|
@ -40,9 +40,6 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std
|
|||
}
|
||||
|
||||
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
|
||||
// Mark any pre-existing rasterizer memory in this range as remapped
|
||||
rasterizer->ModifyGPUMemory(gpu_addr, size);
|
||||
|
||||
const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
|
||||
if (it != map_ranges.end() && it->first == gpu_addr) {
|
||||
it->second = size;
|
||||
|
|
|
@ -200,7 +200,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||
});
|
||||
}
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
|
||||
const bool is_rescaling = info.uses_rescaling_uniform;
|
||||
scheduler.Record([this, descriptor_data, is_rescaling,
|
||||
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
|
|
|
@ -238,6 +238,7 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
|
||||
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
||||
num_textures += Shader::NumDescriptors(info->texture_descriptors);
|
||||
uses_rescale_unfiorm |= info->uses_rescaling_uniform;
|
||||
}
|
||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
||||
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
||||
|
@ -471,7 +472,8 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
|
|||
});
|
||||
}
|
||||
const bool is_rescaling{texture_cache.IsRescaling()};
|
||||
const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)};
|
||||
const bool update_rescaling{uses_rescale_unfiorm ? scheduler.UpdateRescaling(is_rescaling)
|
||||
: false};
|
||||
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
|
||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
|
||||
|
@ -479,10 +481,12 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
|
|||
if (bind_pipeline) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
||||
}
|
||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
|
||||
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
|
||||
rescaling_data.data());
|
||||
if (update_rescaling) {
|
||||
if (uses_rescale_unfiorm) {
|
||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
|
||||
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
|
||||
rescaling_data.data());
|
||||
}
|
||||
if (uses_rescale_unfiorm && update_rescaling) {
|
||||
const f32 config_down_factor{Settings::values.resolution_info.down_factor};
|
||||
const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f};
|
||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
|
||||
|
|
|
@ -151,6 +151,7 @@ private:
|
|||
std::mutex build_mutex;
|
||||
std::atomic_bool is_built{false};
|
||||
bool uses_push_descriptor{false};
|
||||
bool uses_rescale_unfiorm{false};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -234,12 +234,9 @@ void RasterizerVulkan::Clear() {
|
|||
const VkExtent2D render_area = framebuffer->RenderArea();
|
||||
scheduler.RequestRenderpass(framebuffer);
|
||||
|
||||
u32 up_scale = 1;
|
||||
u32 down_shift = 0;
|
||||
if (texture_cache.IsRescaling()) {
|
||||
up_scale = Settings::values.resolution_info.up_scale;
|
||||
down_shift = Settings::values.resolution_info.down_shift;
|
||||
}
|
||||
const bool is_rescaling = texture_cache.IsRescaling();
|
||||
const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U;
|
||||
const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U;
|
||||
UpdateViewportsState(regs);
|
||||
|
||||
VkClearRect clear_rect{
|
||||
|
@ -695,12 +692,9 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
|
|||
if (!state_tracker.TouchScissors()) {
|
||||
return;
|
||||
}
|
||||
u32 up_scale = 1;
|
||||
u32 down_shift = 0;
|
||||
if (texture_cache.IsRescaling()) {
|
||||
up_scale = Settings::values.resolution_info.up_scale;
|
||||
down_shift = Settings::values.resolution_info.down_shift;
|
||||
}
|
||||
const bool is_rescaling = texture_cache.IsRescaling();
|
||||
const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U;
|
||||
const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U;
|
||||
const std::array scissors{
|
||||
GetScissorState(regs, 0, up_scale, down_shift),
|
||||
GetScissorState(regs, 1, up_scale, down_shift),
|
||||
|
|
|
@ -328,7 +328,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
|||
}
|
||||
|
||||
const bool rescaled = RescaleRenderTargets(is_clear);
|
||||
if (is_rescaling != rescaled) {
|
||||
const auto& resolution_info = Settings::values.resolution_info;
|
||||
if (resolution_info.active && is_rescaling != rescaled) {
|
||||
flags[Dirty::RescaleViewports] = true;
|
||||
flags[Dirty::RescaleScissors] = true;
|
||||
is_rescaling = rescaled;
|
||||
|
@ -345,12 +346,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
|||
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
|
||||
}
|
||||
u32 up_scale = 1;
|
||||
u32 down_shift = 0;
|
||||
if (is_rescaling) {
|
||||
up_scale = Settings::values.resolution_info.up_scale;
|
||||
down_shift = Settings::values.resolution_info.down_shift;
|
||||
}
|
||||
const u32 up_scale = is_rescaling ? resolution_info.up_scale : 1U;
|
||||
const u32 down_shift = is_rescaling ? resolution_info.down_shift : 0U;
|
||||
render_targets.size = Extent2D{
|
||||
(maxwell3d.regs.render_area.width * up_scale) >> down_shift,
|
||||
(maxwell3d.regs.render_area.height * up_scale) >> down_shift,
|
||||
|
@ -454,20 +451,15 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
|||
return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
|
||||
});
|
||||
for (const ImageId image_id : images) {
|
||||
DownloadImage(image_id);
|
||||
Image& image = slot_images[image_id];
|
||||
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(map, copies);
|
||||
runtime.Finish();
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::DownloadImage(ImageId image_id) {
|
||||
Image& image = slot_images[image_id];
|
||||
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(map, copies);
|
||||
runtime.Finish();
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||
std::vector<ImageId> deleted_images;
|
||||
|
@ -1063,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
|||
for (const ImageId overlap_id : ignore_textures) {
|
||||
Image& overlap = slot_images[overlap_id];
|
||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||
DownloadImage(overlap_id);
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(overlap, overlap_id);
|
||||
|
|
|
@ -139,9 +139,6 @@ public:
|
|||
/// Download contents of host images to guest memory in a region
|
||||
void DownloadMemory(VAddr cpu_addr, size_t size);
|
||||
|
||||
/// Download contents of host images to guest memory
|
||||
void DownloadImage(ImageId image_id);
|
||||
|
||||
/// Remove images in a region
|
||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||
|
||||
|
|
Loading…
Reference in a new issue