early-access version 4130
This commit is contained in:
parent
cc9eed1bd2
commit
491be807d7
13 changed files with 258 additions and 42 deletions
|
@ -1,7 +1,7 @@
|
|||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 4129.
|
||||
This is the source code for early-access 4130.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "core/hle/service/nvdrv/nvdrv.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/control/scheduler.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
|
@ -33,6 +34,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
|
|||
syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
|
||||
channel_state{system.GPU().AllocateChannel()} {
|
||||
channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
|
||||
channel_state->syncpoint_id = channel_syncpoint;
|
||||
sm_exception_breakpoint_int_report_event =
|
||||
events_interface.CreateEvent("GpuChannelSMExceptionBreakpointInt");
|
||||
sm_exception_breakpoint_pause_report_event =
|
||||
|
@ -157,6 +159,9 @@ NvResult nvhost_gpu::SetErrorNotifier(IoctlSetErrorNotifier& params) {
|
|||
|
||||
NvResult nvhost_gpu::SetChannelPriority(IoctlChannelSetPriority& params) {
|
||||
channel_priority = params.priority;
|
||||
if (channel_state->initialized) {
|
||||
system.GPU().Scheduler().ChangePriority(channel_state->bind_id, channel_priority);
|
||||
}
|
||||
LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority);
|
||||
return NvResult::Success;
|
||||
}
|
||||
|
@ -314,6 +319,7 @@ NvResult nvhost_gpu::GetWaitbase(IoctlGetWaitbase& params) {
|
|||
NvResult nvhost_gpu::ChannelSetTimeout(IoctlChannelSetTimeout& params) {
|
||||
LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout);
|
||||
|
||||
channel_state->timeout = params.timeout;
|
||||
return NvResult::Success;
|
||||
}
|
||||
|
||||
|
@ -321,6 +327,7 @@ NvResult nvhost_gpu::ChannelSetTimeslice(IoctlSetTimeslice& params) {
|
|||
LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);
|
||||
|
||||
channel_timeslice = params.timeslice;
|
||||
channel_state->timeslice = params.timeslice;
|
||||
|
||||
return NvResult::Success;
|
||||
}
|
||||
|
|
|
@ -45,6 +45,12 @@ struct ChannelState {
|
|||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
s32 bind_id = -1;
|
||||
/// Scheduling info
|
||||
u32 syncpoint_id = 0xFFFF;
|
||||
u32 priority = 0;
|
||||
u32 timeslice = 0;
|
||||
u32 timeout = 0;
|
||||
|
||||
/// 3D engine
|
||||
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
|
||||
/// 2D engine
|
||||
|
|
|
@ -1,32 +1,204 @@
|
|||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <atomic>
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "common/fiber.h"
|
||||
#include "video_core/control/scheduler.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra::Control {
|
||||
Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
|
||||
|
||||
struct GPFifoContext {
|
||||
bool is_active;
|
||||
bool is_running;
|
||||
std::shared_ptr<Common::Fiber> context;
|
||||
std::deque<CommandList> pending_work;
|
||||
std::mutex guard;
|
||||
s32 bind_id;
|
||||
std::shared_ptr<ChannelState> info;
|
||||
size_t yield_count;
|
||||
size_t scheduled_count;
|
||||
};
|
||||
|
||||
struct Scheduler::SchedulerImpl {
|
||||
// Fifos
|
||||
std::map<u32, std::list<size_t>, std::greater<u32>> schedule_priority_queue;
|
||||
std::unordered_map<s32, size_t> channel_gpfifo_ids;
|
||||
std::deque<GPFifoContext> gpfifos;
|
||||
std::deque<size_t> free_fifos;
|
||||
|
||||
// Scheduling
|
||||
std::mutex scheduling_guard;
|
||||
std::shared_ptr<Common::Fiber> master_control;
|
||||
bool must_reschedule{};
|
||||
GPFifoContext* current_fifo{};
|
||||
};
|
||||
|
||||
Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {
|
||||
impl = std::make_unique<SchedulerImpl>();
|
||||
}
|
||||
|
||||
Scheduler::~Scheduler() = default;
|
||||
|
||||
void Scheduler::Init() {
|
||||
impl->master_control = Common::Fiber::ThreadToFiber();
|
||||
}
|
||||
|
||||
void Scheduler::Resume() {
|
||||
bool pending_work;
|
||||
do {
|
||||
pending_work = false;
|
||||
{
|
||||
std::unique_lock lk(impl->scheduling_guard);
|
||||
impl->current_fifo = nullptr;
|
||||
auto it = impl->schedule_priority_queue.begin();
|
||||
while (it != impl->schedule_priority_queue.end()) {
|
||||
pending_work = ScheduleLevel(it->second);
|
||||
if (pending_work) {
|
||||
break;
|
||||
}
|
||||
it = std::next(it);
|
||||
}
|
||||
if (pending_work) {
|
||||
impl->must_reschedule = false;
|
||||
}
|
||||
}
|
||||
if (impl->current_fifo) {
|
||||
impl->current_fifo->scheduled_count++;
|
||||
Common::Fiber::YieldTo(impl->master_control, *impl->current_fifo->context);
|
||||
}
|
||||
} while (pending_work);
|
||||
}
|
||||
|
||||
bool Scheduler::ScheduleLevel(std::list<size_t>& queue) {
|
||||
bool found_anything = false;
|
||||
size_t min_schedule_count = std::numeric_limits<size_t>::max();
|
||||
for (auto id : queue) {
|
||||
auto& fifo = impl->gpfifos[id];
|
||||
std::scoped_lock lk2(fifo.guard);
|
||||
if (!fifo.pending_work.empty() || fifo.is_running) {
|
||||
if (fifo.scheduled_count > min_schedule_count) {
|
||||
continue;
|
||||
}
|
||||
if (fifo.scheduled_count < fifo.yield_count) {
|
||||
fifo.scheduled_count++;
|
||||
continue;
|
||||
}
|
||||
min_schedule_count = fifo.scheduled_count;
|
||||
impl->current_fifo = &fifo;
|
||||
found_anything = true;
|
||||
}
|
||||
}
|
||||
return found_anything;
|
||||
}
|
||||
|
||||
void Scheduler::ChangePriority(s32 channel_id, u32 new_priority) {
|
||||
std::unique_lock lk(impl->scheduling_guard);
|
||||
auto fifo_it = impl->channel_gpfifo_ids.find(channel_id);
|
||||
if (fifo_it == impl->channel_gpfifo_ids.end()) {
|
||||
return;
|
||||
}
|
||||
const size_t fifo_id = fifo_it->second;
|
||||
auto& fifo = impl->gpfifos[fifo_id];
|
||||
const auto old_priority = fifo.info->priority;
|
||||
fifo.info->priority = new_priority;
|
||||
impl->schedule_priority_queue.try_emplace(new_priority);
|
||||
impl->schedule_priority_queue[new_priority].push_back(fifo_id);
|
||||
impl->schedule_priority_queue[old_priority].remove_if(
|
||||
[fifo_id](size_t id) { return id == fifo_id; });
|
||||
}
|
||||
|
||||
void Scheduler::Yield() {
|
||||
ASSERT(impl->current_fifo != nullptr);
|
||||
impl->current_fifo->yield_count = impl->current_fifo->scheduled_count + 1;
|
||||
Common::Fiber::YieldTo(impl->current_fifo->context, *impl->master_control);
|
||||
gpu.BindChannel(impl->current_fifo->bind_id);
|
||||
}
|
||||
|
||||
void Scheduler::CheckStatus() {
|
||||
{
|
||||
std::unique_lock lk(impl->scheduling_guard);
|
||||
if (!impl->must_reschedule) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
Common::Fiber::YieldTo(impl->current_fifo->context, *impl->master_control);
|
||||
gpu.BindChannel(impl->current_fifo->bind_id);
|
||||
}
|
||||
|
||||
void Scheduler::Push(s32 channel, CommandList&& entries) {
|
||||
std::unique_lock lk(scheduling_guard);
|
||||
auto it = channels.find(channel);
|
||||
ASSERT(it != channels.end());
|
||||
auto channel_state = it->second;
|
||||
gpu.BindChannel(channel_state->bind_id);
|
||||
channel_state->dma_pusher->Push(std::move(entries));
|
||||
channel_state->dma_pusher->DispatchCalls();
|
||||
std::unique_lock lk(impl->scheduling_guard);
|
||||
auto it = impl->channel_gpfifo_ids.find(channel);
|
||||
ASSERT(it != impl->channel_gpfifo_ids.end());
|
||||
auto gpfifo_id = it->second;
|
||||
auto& fifo = impl->gpfifos[gpfifo_id];
|
||||
{
|
||||
std::scoped_lock lk2(fifo.guard);
|
||||
fifo.pending_work.emplace_back(std::move(entries));
|
||||
}
|
||||
if (impl->current_fifo != nullptr && impl->current_fifo->info->priority < fifo.info->priority) {
|
||||
impl->must_reschedule = true;
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::ChannelLoop(size_t gpfifo_id, s32 channel_id) {
|
||||
gpu.BindChannel(channel_id);
|
||||
auto& fifo = impl->gpfifos[gpfifo_id];
|
||||
while (true) {
|
||||
auto* channel_state = fifo.info.get();
|
||||
fifo.guard.lock();
|
||||
while (!fifo.pending_work.empty()) {
|
||||
fifo.is_running = true;
|
||||
{
|
||||
CommandList&& entries = std::move(fifo.pending_work.front());
|
||||
channel_state->dma_pusher->Push(std::move(entries));
|
||||
fifo.pending_work.pop_front();
|
||||
}
|
||||
fifo.guard.unlock();
|
||||
channel_state->dma_pusher->DispatchCalls();
|
||||
CheckStatus();
|
||||
fifo.guard.lock();
|
||||
}
|
||||
fifo.is_running = false;
|
||||
fifo.guard.unlock();
|
||||
Common::Fiber::YieldTo(fifo.context, *impl->master_control);
|
||||
gpu.BindChannel(channel_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
|
||||
s32 channel = new_channel->bind_id;
|
||||
std::unique_lock lk(scheduling_guard);
|
||||
channels.emplace(channel, new_channel);
|
||||
std::unique_lock lk(impl->scheduling_guard);
|
||||
|
||||
size_t new_fifo_id;
|
||||
if (!impl->free_fifos.empty()) {
|
||||
new_fifo_id = impl->free_fifos.front();
|
||||
impl->free_fifos.pop_front();
|
||||
} else {
|
||||
new_fifo_id = impl->gpfifos.size();
|
||||
impl->gpfifos.emplace_back();
|
||||
}
|
||||
auto& new_fifo = impl->gpfifos[new_fifo_id];
|
||||
impl->channel_gpfifo_ids[channel] = new_fifo_id;
|
||||
new_fifo.is_active = true;
|
||||
new_fifo.bind_id = channel;
|
||||
new_fifo.pending_work.clear();
|
||||
new_fifo.info = new_channel;
|
||||
new_fifo.scheduled_count = 0;
|
||||
new_fifo.yield_count = 0;
|
||||
new_fifo.is_running = false;
|
||||
impl->schedule_priority_queue.try_emplace(new_channel->priority);
|
||||
impl->schedule_priority_queue[new_channel->priority].push_back(new_fifo_id);
|
||||
std::function<void()> callback = std::bind(&Scheduler::ChannelLoop, this, new_fifo_id, channel);
|
||||
new_fifo.context = std::make_shared<Common::Fiber>(std::move(callback));
|
||||
}
|
||||
|
||||
} // namespace Tegra::Control
|
||||
|
|
|
@ -3,10 +3,11 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
@ -22,13 +23,26 @@ public:
|
|||
explicit Scheduler(GPU& gpu_);
|
||||
~Scheduler();
|
||||
|
||||
void Init();
|
||||
|
||||
void Resume();
|
||||
|
||||
void Yield();
|
||||
|
||||
void Push(s32 channel, CommandList&& entries);
|
||||
|
||||
void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
|
||||
|
||||
void ChangePriority(s32 channel_id, u32 new_priority);
|
||||
|
||||
private:
|
||||
std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
|
||||
std::mutex scheduling_guard;
|
||||
void ChannelLoop(size_t gpfifo_id, s32 channel_id);
|
||||
bool ScheduleLevel(std::list<size_t>& queue);
|
||||
void CheckStatus();
|
||||
|
||||
struct SchedulerImpl;
|
||||
std::unique_ptr<SchedulerImpl> impl;
|
||||
|
||||
GPU& gpu;
|
||||
};
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/control/scheduler.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
|
@ -14,6 +15,8 @@
|
|||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/host1x/host1x.h"
|
||||
#include "video_core/host1x/syncpoint_manager.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
|
@ -60,11 +63,14 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
|
|||
}
|
||||
|
||||
void Puller::ProcessFenceActionMethod() {
|
||||
auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager();
|
||||
switch (regs.fence_action.op) {
|
||||
case Puller::FenceOperation::Acquire:
|
||||
// UNIMPLEMENTED_MSG("Channel Scheduling pending.");
|
||||
// WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||
rasterizer->ReleaseFences();
|
||||
while (regs.fence_value >
|
||||
syncpoint_manager.GetGuestSyncpointValue(regs.fence_action.syncpoint_id)) {
|
||||
rasterizer->ReleaseFences();
|
||||
gpu.Scheduler().Yield();
|
||||
}
|
||||
break;
|
||||
case Puller::FenceOperation::Increment:
|
||||
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
|
||||
|
|
|
@ -387,6 +387,14 @@ std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
|
|||
return impl->AllocateChannel();
|
||||
}
|
||||
|
||||
Tegra::Control::Scheduler& GPU::Scheduler() {
|
||||
return *impl->scheduler;
|
||||
}
|
||||
|
||||
const Tegra::Control::Scheduler& GPU::Scheduler() const {
|
||||
return *impl->scheduler;
|
||||
}
|
||||
|
||||
void GPU::InitChannel(Control::ChannelState& to_init) {
|
||||
impl->InitChannel(to_init);
|
||||
}
|
||||
|
|
|
@ -124,7 +124,8 @@ class KeplerCompute;
|
|||
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
class Scheduler;
|
||||
} // namespace Control
|
||||
|
||||
namespace Host1x {
|
||||
class Host1x;
|
||||
|
@ -204,6 +205,12 @@ public:
|
|||
/// Returns a const reference to the shader notifier.
|
||||
[[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const;
|
||||
|
||||
/// Returns GPU Channel Scheduler.
|
||||
[[nodiscard]] Tegra::Control::Scheduler& Scheduler();
|
||||
|
||||
/// Returns GPU Channel Scheduler.
|
||||
[[nodiscard]] const Tegra::Control::Scheduler& Scheduler() const;
|
||||
|
||||
[[nodiscard]] u64 GetTicks() const;
|
||||
|
||||
[[nodiscard]] bool IsAsync() const;
|
||||
|
|
|
@ -34,13 +34,15 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
|
|||
|
||||
CommandDataContainer next;
|
||||
|
||||
scheduler.Init();
|
||||
|
||||
while (!stop_token.stop_requested()) {
|
||||
state.queue.PopWait(next, stop_token);
|
||||
if (stop_token.stop_requested()) {
|
||||
break;
|
||||
}
|
||||
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
|
||||
scheduler.Push(submit_list->channel, std::move(submit_list->entries));
|
||||
if (std::holds_alternative<SubmitListCommand>(next.data)) {
|
||||
scheduler.Resume();
|
||||
} else if (std::holds_alternative<GPUTickCommand>(next.data)) {
|
||||
system.GPU().TickWork();
|
||||
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||
|
@ -67,14 +69,16 @@ ThreadManager::~ThreadManager() = default;
|
|||
|
||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
||||
Core::Frontend::GraphicsContext& context,
|
||||
Tegra::Control::Scheduler& scheduler) {
|
||||
Tegra::Control::Scheduler& scheduler_) {
|
||||
rasterizer = renderer.ReadRasterizer();
|
||||
scheduler = &scheduler_;
|
||||
thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
|
||||
std::ref(scheduler), std::ref(state));
|
||||
std::ref(scheduler_), std::ref(state));
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
|
||||
PushCommand(SubmitListCommand(channel, std::move(entries)));
|
||||
scheduler->Push(channel, std::move(entries));
|
||||
PushCommand(SubmitListCommand());
|
||||
}
|
||||
|
||||
void ThreadManager::FlushRegion(DAddr addr, u64 size) {
|
||||
|
|
|
@ -36,13 +36,7 @@ class RendererBase;
|
|||
namespace VideoCommon::GPUThread {
|
||||
|
||||
/// Command to signal to the GPU thread that a command list is ready for processing
|
||||
struct SubmitListCommand final {
|
||||
explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
|
||||
: channel{channel_}, entries{std::move(entries_)} {}
|
||||
|
||||
s32 channel;
|
||||
Tegra::CommandList entries;
|
||||
};
|
||||
struct SubmitListCommand final {};
|
||||
|
||||
/// Command to signal to the GPU thread to flush a region
|
||||
struct FlushRegionCommand final {
|
||||
|
@ -124,6 +118,7 @@ public:
|
|||
private:
|
||||
/// Pushes a command to be executed by the GPU thread
|
||||
u64 PushCommand(CommandData&& command_data, bool block = false);
|
||||
Tegra::Control::Scheduler* scheduler;
|
||||
|
||||
Core::System& system;
|
||||
const bool is_async;
|
||||
|
|
|
@ -42,7 +42,6 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
|
|||
};
|
||||
}
|
||||
rescaleable = false;
|
||||
is_sparse = config.is_sparse != 0;
|
||||
tile_width_spacing = config.tile_width_spacing;
|
||||
if (config.texture_type != TextureType::Texture2D &&
|
||||
config.texture_type != TextureType::Texture2DNoMipmap) {
|
||||
|
|
|
@ -41,7 +41,6 @@ struct ImageInfo {
|
|||
bool downscaleable = false;
|
||||
bool forced_flushed = false;
|
||||
bool dma_downloaded = false;
|
||||
bool is_sparse = false;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
@ -600,17 +600,17 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
|
|||
[&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||
for (const ImageId id : deleted_images) {
|
||||
Image& image = slot_images[id];
|
||||
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, id);
|
||||
}
|
||||
if (True(image.flags & ImageFlagBits::CpuModified)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
image.flags |= ImageFlagBits::CpuModified;
|
||||
if (True(image.flags & ImageFlagBits::Remapped)) {
|
||||
continue;
|
||||
}
|
||||
image.flags |= ImageFlagBits::Remapped;
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1469,8 +1469,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
|
|||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||
Image& new_image = slot_images[new_image_id];
|
||||
|
||||
if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes) &&
|
||||
new_info.is_sparse) {
|
||||
if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
|
||||
new_image.flags |= ImageFlagBits::Sparse;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue