early-access version 2263

This commit is contained in:
pineappleEA 2021-12-03 04:35:20 +01:00
parent ecc126a6a8
commit a28afca771
18 changed files with 101 additions and 62 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 2261.
This is the source code for early-access 2263.
## Legal Notice

View file

@ -19,16 +19,16 @@ u64 EstimateRDTSCFrequency() {
// get current time
_mm_mfence();
const u64 tscStart = __rdtsc();
const auto startTime = std::chrono::high_resolution_clock::now();
const auto startTime = std::chrono::steady_clock::now();
// wait roughly 3 seconds
while (true) {
auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - startTime);
std::chrono::steady_clock::now() - startTime);
if (milli.count() >= 3000)
break;
std::this_thread::sleep_for(milli_10);
}
const auto endTime = std::chrono::high_resolution_clock::now();
const auto endTime = std::chrono::steady_clock::now();
_mm_mfence();
const u64 tscEnd = __rdtsc();
// calculate difference

View file

@ -30,6 +30,7 @@
#include "core/hle/service/apm/apm_controller.h"
#include "core/hle/service/apm/apm_interface.h"
#include "core/hle/service/bcat/backend/backend.h"
#include "core/hle/service/caps/caps.h"
#include "core/hle/service/filesystem/filesystem.h"
#include "core/hle/service/ns/ns.h"
#include "core/hle/service/nvflinger/nvflinger.h"
@ -298,7 +299,7 @@ ISelfController::ISelfController(Core::System& system_, NVFlinger::NVFlinger& nv
{91, &ISelfController::GetAccumulatedSuspendedTickChangedEvent, "GetAccumulatedSuspendedTickChangedEvent"},
{100, &ISelfController::SetAlbumImageTakenNotificationEnabled, "SetAlbumImageTakenNotificationEnabled"},
{110, nullptr, "SetApplicationAlbumUserData"},
{120, nullptr, "SaveCurrentScreenshot"},
{120, &ISelfController::SaveCurrentScreenshot, "SaveCurrentScreenshot"},
{130, nullptr, "SetRecordVolumeMuted"},
{1000, nullptr, "GetDebugStorageChannel"},
};
@ -579,6 +580,17 @@ void ISelfController::SetAlbumImageTakenNotificationEnabled(Kernel::HLERequestCo
rb.Push(ResultSuccess);
}
void ISelfController::SaveCurrentScreenshot(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto album_report_option = rp.PopEnum<Capture::AlbumReportOption>();
LOG_WARNING(Service_AM, "(STUBBED) called. album_report_option={}", album_report_option);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess);
}
AppletMessageQueue::AppletMessageQueue(Core::System& system)
: service_context{system, "AppletMessageQueue"} {
on_new_message = service_context.CreateEvent("AMMessageQueue:OnMessageReceived");

View file

@ -151,6 +151,7 @@ private:
void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx);
void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx);
void SetAlbumImageTakenNotificationEnabled(Kernel::HLERequestContext& ctx);
void SaveCurrentScreenshot(Kernel::HLERequestContext& ctx);
enum class ScreenshotPermission : u32 {
Inherit = 0,

View file

@ -96,7 +96,7 @@ private:
bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
std::vector<opus_int16>& output, u64* out_performance_time) const {
const auto start_time = std::chrono::high_resolution_clock::now();
const auto start_time = std::chrono::steady_clock::now();
const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
if (sizeof(OpusPacketHeader) > input.size()) {
LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
@ -135,7 +135,7 @@ private:
return false;
}
const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
const auto end_time = std::chrono::steady_clock::now() - start_time;
sample_count = out_sample_count;
consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
if (out_performance_time != nullptr) {

View file

@ -24,7 +24,7 @@ enum class AlbumImageOrientation {
Orientation3 = 3,
};
enum class AlbumReportOption {
enum class AlbumReportOption : s32 {
Disable = 0,
Enable = 1,
};

View file

@ -21,7 +21,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>&
case 0x0:
switch (command.cmd) {
case 0x1:
return Submit(fd, input, output);
return Submit(input, output);
case 0x2:
return GetSyncpoint(input, output);
case 0x3:
@ -62,16 +62,11 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>&
return NvResult::NotImplemented;
}
void nvhost_nvdec::OnOpen(DeviceFD fd) {
static u32 next_id{};
fd_to_id[fd] = next_id++;
}
void nvhost_nvdec::OnOpen(DeviceFD fd) {}
void nvhost_nvdec::OnClose(DeviceFD fd) {
LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
if (fd_to_id.find(fd) != fd_to_id.end()) {
system.GPU().ClearCdmaInstance(fd_to_id[fd]);
}
system.GPU().ClearCdmaInstance();
}
} // namespace Service::Nvidia::Devices

View file

@ -59,8 +59,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
return NvResult::Success;
}
NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector<u8>& input,
std::vector<u8>& output) {
NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u8>& output) {
IoctlSubmit params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
@ -94,7 +93,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector<u8>& input,
Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
system.Memory().ReadBlock(object->addr + cmd_buffer.offset, cmdlist.data(),
cmdlist.size() * sizeof(u32));
gpu.PushCommandBuffer(fd_to_id[fd], cmdlist);
gpu.PushCommandBuffer(cmdlist);
}
std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
// Some games expect command_buffers to be written back

View file

@ -104,14 +104,13 @@ protected:
/// Ioctl command implementations
NvResult SetNVMAPfd(const std::vector<u8>& input);
NvResult Submit(DeviceFD fd, const std::vector<u8>& input, std::vector<u8>& output);
NvResult Submit(const std::vector<u8>& input, std::vector<u8>& output);
NvResult GetSyncpoint(const std::vector<u8>& input, std::vector<u8>& output);
NvResult GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output);
NvResult MapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
NvResult UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output);
NvResult SetSubmitTimeout(const std::vector<u8>& input, std::vector<u8>& output);
std::unordered_map<DeviceFD, u32> fd_to_id{};
s32_le nvmap_fd{};
u32_le submit_timeout{};
std::shared_ptr<nvmap> nvmap_dev;

View file

@ -21,7 +21,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& i
case 0x0:
switch (command.cmd) {
case 0x1:
return Submit(fd, input, output);
return Submit(input, output);
case 0x2:
return GetSyncpoint(input, output);
case 0x3:
@ -62,15 +62,10 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& i
return NvResult::NotImplemented;
}
void nvhost_vic::OnOpen(DeviceFD fd) {
static u32 next_id{};
fd_to_id[fd] = next_id++;
}
void nvhost_vic::OnOpen(DeviceFD fd) {}
void nvhost_vic::OnClose(DeviceFD fd) {
if (fd_to_id.find(fd) != fd_to_id.end()) {
system.GPU().ClearCdmaInstance(fd_to_id[fd]);
}
system.GPU().ClearCdmaInstance();
}
} // namespace Service::Nvidia::Devices

View file

@ -33,7 +33,7 @@ public:
explicit PerfStats(u64 title_id_);
~PerfStats();
using Clock = std::chrono::high_resolution_clock;
using Clock = std::chrono::steady_clock;
void BeginSystemFrame();
void EndSystemFrame();
@ -87,7 +87,7 @@ private:
class SpeedLimiter {
public:
using Clock = std::chrono::high_resolution_clock;
using Clock = std::chrono::steady_clock;
void DoSpeedLimiting(std::chrono::microseconds current_system_time_us);

View file

@ -23,6 +23,17 @@ namespace Tegra {
namespace {
constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
constexpr std::array PREFERRED_GPU_DECODERS = {
AV_HWDEVICE_TYPE_CUDA,
#ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA,
AV_HWDEVICE_TYPE_DXVA2,
#elif defined(__linux__)
AV_HWDEVICE_TYPE_VDPAU,
#endif
// last resort for Linux Flatpak (w/ NVIDIA)
AV_HWDEVICE_TYPE_VULKAN,
};
void AVPacketDeleter(AVPacket* ptr) {
av_packet_free(&ptr);
@ -61,6 +72,20 @@ Codec::~Codec() {
av_buffer_unref(&av_gpu_decoder);
}
// List all the currently available hwcontext in ffmpeg
static std::vector<AVHWDeviceType> ListSupportedContexts() {
std::vector<AVHWDeviceType> contexts{};
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
do {
current_device_type = av_hwdevice_iterate_types(current_device_type);
// filter out VA-API since we will try that first if supported
if (current_device_type != AV_HWDEVICE_TYPE_VAAPI) {
contexts.push_back(current_device_type);
}
} while (current_device_type != AV_HWDEVICE_TYPE_NONE);
return contexts;
}
#ifdef LIBVA_FOUND
// List all the currently loaded Linux modules
static std::vector<std::string> ListLinuxKernelModules() {
@ -122,16 +147,12 @@ bool Codec::CreateGpuAvDevice() {
av_dict_free(&hwdevice_options);
#endif
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
static constexpr std::array GPU_DECODER_TYPES{
#ifdef linux
AV_HWDEVICE_TYPE_VDPAU,
#endif
AV_HWDEVICE_TYPE_CUDA,
#ifdef _WIN32
AV_HWDEVICE_TYPE_D3D11VA,
#endif
};
for (const auto& type : GPU_DECODER_TYPES) {
static const auto supported_contexts = ListSupportedContexts();
for (const auto& type : PREFERRED_GPU_DECODERS) {
if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
[&type](const auto& context) { return context == type; })) {
continue;
}
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
if (hwdevice_res < 0) {
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",

View file

@ -185,6 +185,16 @@ struct GPU::Impl {
return *dma_pusher;
}
/// Returns a reference to the GPU CDMA pusher.
[[nodiscard]] Tegra::CDmaPusher& CDmaPusher() {
return *cdma_pusher;
}
/// Returns a const reference to the GPU CDMA pusher.
[[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const {
return *cdma_pusher;
}
/// Returns a reference to the underlying renderer.
[[nodiscard]] VideoCore::RendererBase& Renderer() {
return *renderer;
@ -328,26 +338,25 @@ struct GPU::Impl {
}
/// Push GPU command buffer entries to be processed
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
if (!use_nvdec) {
return;
}
if (cdma_pushers.find(id) == cdma_pushers.end()) {
cdma_pushers[id] = std::make_unique<Tegra::CDmaPusher>(gpu);
if (!cdma_pusher) {
cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu);
}
// SubmitCommandBuffer would make the nvdec operations async, this is not currently working
// TODO(ameerj): RE proper async nvdec operation
// gpu_thread.SubmitCommandBuffer(std::move(entries));
cdma_pushers[id]->ProcessEntries(std::move(entries));
cdma_pusher->ProcessEntries(std::move(entries));
}
/// Frees the CDMAPusher instance to free up resources
void ClearCdmaInstance(u32 id) {
if (cdma_pushers.find(id) != cdma_pushers.end()) {
cdma_pushers.erase(id);
}
void ClearCdmaInstance() {
cdma_pusher.reset();
}
/// Swap buffers (render frame)
@ -650,7 +659,7 @@ struct GPU::Impl {
Core::System& system;
std::unique_ptr<Tegra::MemoryManager> memory_manager;
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
std::unique_ptr<VideoCore::RendererBase> renderer;
VideoCore::RasterizerInterface* rasterizer = nullptr;
const bool use_nvdec;
@ -802,6 +811,14 @@ const Tegra::DmaPusher& GPU::DmaPusher() const {
return impl->DmaPusher();
}
Tegra::CDmaPusher& GPU::CDmaPusher() {
return impl->CDmaPusher();
}
const Tegra::CDmaPusher& GPU::CDmaPusher() const {
return impl->CDmaPusher();
}
VideoCore::RendererBase& GPU::Renderer() {
return impl->Renderer();
}
@ -870,12 +887,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
impl->PushGPUEntries(std::move(entries));
}
void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
impl->PushCommandBuffer(id, entries);
void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
impl->PushCommandBuffer(entries);
}
void GPU::ClearCdmaInstance(u32 id) {
impl->ClearCdmaInstance(id);
void GPU::ClearCdmaInstance() {
impl->ClearCdmaInstance();
}
void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {

View file

@ -242,10 +242,10 @@ public:
void PushGPUEntries(Tegra::CommandList&& entries);
/// Push GPU command buffer entries to be processed
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
/// Frees the CDMAPusher instance to free up resources
void ClearCdmaInstance(u32 id);
void ClearCdmaInstance();
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);

View file

@ -18,7 +18,7 @@ int ShaderNotify::ShadersBuilding() noexcept {
const int now_complete = num_complete.load(std::memory_order::relaxed);
const int now_building = num_building.load(std::memory_order::relaxed);
if (now_complete == now_building) {
const auto now = std::chrono::high_resolution_clock::now();
const auto now = std::chrono::steady_clock::now();
if (completed && num_complete == num_when_completed) {
if (now - complete_time > TIME_TO_STOP_REPORTING) {
report_base = now_complete;

View file

@ -28,6 +28,6 @@ private:
bool completed{};
int num_when_completed{};
std::chrono::high_resolution_clock::time_point complete_time;
std::chrono::steady_clock::time_point complete_time;
};
} // namespace VideoCore

View file

@ -136,7 +136,7 @@ void LoadingScreen::OnLoadComplete() {
void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size_t value,
std::size_t total) {
using namespace std::chrono;
const auto now = high_resolution_clock::now();
const auto now = steady_clock::now();
// reset the timer if the stage changes
if (stage != previous_stage) {
ui->progress_bar->setStyleSheet(QString::fromUtf8(progressbar_style[stage]));
@ -160,7 +160,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
// If theres a drastic slowdown in the rate, then display an estimate
if (now - previous_time > milliseconds{50} || slow_shader_compile_start) {
if (!slow_shader_compile_start) {
slow_shader_start = high_resolution_clock::now();
slow_shader_start = steady_clock::now();
slow_shader_compile_start = true;
slow_shader_first_value = value;
}

View file

@ -84,8 +84,8 @@ private:
// shaders, it will start quickly but end slow if new shaders were added since previous launch.
// These variables are used to detect the change in speed so we can generate an ETA
bool slow_shader_compile_start = false;
std::chrono::high_resolution_clock::time_point slow_shader_start;
std::chrono::high_resolution_clock::time_point previous_time;
std::chrono::steady_clock::time_point slow_shader_start;
std::chrono::steady_clock::time_point previous_time;
std::size_t slow_shader_first_value = 0;
};