early-access version 1259
This commit is contained in:
parent
c7d8d0947d
commit
db53458619
12 changed files with 239 additions and 111 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 1258.
|
This is the source code for early-access 1259.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <unordered_map>
|
#include <unordered_set>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
@ -35,6 +35,7 @@
|
||||||
#include "core/hle/kernel/physical_core.h"
|
#include "core/hle/kernel/physical_core.h"
|
||||||
#include "core/hle/kernel/process.h"
|
#include "core/hle/kernel/process.h"
|
||||||
#include "core/hle/kernel/resource_limit.h"
|
#include "core/hle/kernel/resource_limit.h"
|
||||||
|
#include "core/hle/kernel/service_thread.h"
|
||||||
#include "core/hle/kernel/shared_memory.h"
|
#include "core/hle/kernel/shared_memory.h"
|
||||||
#include "core/hle/kernel/synchronization.h"
|
#include "core/hle/kernel/synchronization.h"
|
||||||
#include "core/hle/kernel/thread.h"
|
#include "core/hle/kernel/thread.h"
|
||||||
|
@ -107,6 +108,9 @@ struct KernelCore::Impl {
|
||||||
std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(),
|
std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(),
|
||||||
std::thread::id{});
|
std::thread::id{});
|
||||||
std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0);
|
std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0);
|
||||||
|
|
||||||
|
// Ensures all service threads gracefully shutdown
|
||||||
|
service_threads.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void InitializePhysicalCores() {
|
void InitializePhysicalCores() {
|
||||||
|
@ -345,6 +349,9 @@ struct KernelCore::Impl {
|
||||||
std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;
|
std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;
|
||||||
std::shared_ptr<Kernel::SharedMemory> time_shared_mem;
|
std::shared_ptr<Kernel::SharedMemory> time_shared_mem;
|
||||||
|
|
||||||
|
// Threads used for services
|
||||||
|
std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
|
||||||
|
|
||||||
std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
|
std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
|
||||||
std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
|
std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
|
||||||
std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
|
std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
|
||||||
|
@ -639,4 +646,16 @@ void KernelCore::ExitSVCProfile() {
|
||||||
MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
|
MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) {
|
||||||
|
auto service_thread = std::make_shared<Kernel::ServiceThread>(*this, 1, name);
|
||||||
|
impl->service_threads.emplace(service_thread);
|
||||||
|
return service_thread;
|
||||||
|
}
|
||||||
|
|
||||||
|
void KernelCore::ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread) {
|
||||||
|
if (auto strong_ptr = service_thread.lock()) {
|
||||||
|
impl->service_threads.erase(strong_ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Kernel
|
} // namespace Kernel
|
||||||
|
|
|
@ -42,6 +42,7 @@ class Process;
|
||||||
class ResourceLimit;
|
class ResourceLimit;
|
||||||
class KScheduler;
|
class KScheduler;
|
||||||
class SharedMemory;
|
class SharedMemory;
|
||||||
|
class ServiceThread;
|
||||||
class Synchronization;
|
class Synchronization;
|
||||||
class Thread;
|
class Thread;
|
||||||
class TimeManager;
|
class TimeManager;
|
||||||
|
@ -227,6 +228,22 @@ public:
|
||||||
|
|
||||||
void ExitSVCProfile();
|
void ExitSVCProfile();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an HLE service thread, which are used to execute service routines asynchronously.
|
||||||
|
* While these are allocated per ServerSession, these need to be owned and managed outside of
|
||||||
|
* ServerSession to avoid a circular dependency.
|
||||||
|
* @param name String name for the ServerSession creating this thread, used for debug purposes.
|
||||||
|
* @returns The a weak pointer newly created service thread.
|
||||||
|
*/
|
||||||
|
std::weak_ptr<Kernel::ServiceThread> CreateServiceThread(const std::string& name);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Releases a HLE service thread, instructing KernelCore to free it. This should be called when
|
||||||
|
* the ServerSession associated with the thread is destroyed.
|
||||||
|
* @param service_thread Service thread to release.
|
||||||
|
*/
|
||||||
|
void ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class Object;
|
friend class Object;
|
||||||
friend class Process;
|
friend class Process;
|
||||||
|
|
|
@ -25,7 +25,10 @@
|
||||||
namespace Kernel {
|
namespace Kernel {
|
||||||
|
|
||||||
ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
|
ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
|
||||||
ServerSession::~ServerSession() = default;
|
|
||||||
|
ServerSession::~ServerSession() {
|
||||||
|
kernel.ReleaseServiceThread(service_thread);
|
||||||
|
}
|
||||||
|
|
||||||
ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel,
|
ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel,
|
||||||
std::shared_ptr<Session> parent,
|
std::shared_ptr<Session> parent,
|
||||||
|
@ -34,7 +37,7 @@ ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kern
|
||||||
|
|
||||||
session->name = std::move(name);
|
session->name = std::move(name);
|
||||||
session->parent = std::move(parent);
|
session->parent = std::move(parent);
|
||||||
session->service_thread = std::make_unique<ServiceThread>(kernel, 1);
|
session->service_thread = kernel.CreateServiceThread(session->name);
|
||||||
|
|
||||||
return MakeResult(std::move(session));
|
return MakeResult(std::move(session));
|
||||||
}
|
}
|
||||||
|
@ -139,7 +142,11 @@ ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread,
|
||||||
std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread));
|
std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread));
|
||||||
|
|
||||||
context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
|
context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
|
||||||
service_thread->QueueSyncRequest(*this, std::move(context));
|
|
||||||
|
if (auto strong_ptr = service_thread.lock()) {
|
||||||
|
strong_ptr->QueueSyncRequest(*this, std::move(context));
|
||||||
|
return RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
return RESULT_SUCCESS;
|
return RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,7 +167,7 @@ private:
|
||||||
std::string name;
|
std::string name;
|
||||||
|
|
||||||
/// Thread to dispatch service requests
|
/// Thread to dispatch service requests
|
||||||
std::unique_ptr<ServiceThread> service_thread;
|
std::weak_ptr<ServiceThread> service_thread;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Kernel
|
} // namespace Kernel
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
|
#include "common/thread.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/hle/kernel/kernel.h"
|
#include "core/hle/kernel/kernel.h"
|
||||||
#include "core/hle/kernel/server_session.h"
|
#include "core/hle/kernel/server_session.h"
|
||||||
|
@ -22,7 +23,7 @@ namespace Kernel {
|
||||||
|
|
||||||
class ServiceThread::Impl final {
|
class ServiceThread::Impl final {
|
||||||
public:
|
public:
|
||||||
explicit Impl(KernelCore& kernel, std::size_t num_threads);
|
explicit Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name);
|
||||||
~Impl();
|
~Impl();
|
||||||
|
|
||||||
void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
|
void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
|
||||||
|
@ -32,12 +33,16 @@ private:
|
||||||
std::queue<std::function<void()>> requests;
|
std::queue<std::function<void()>> requests;
|
||||||
std::mutex queue_mutex;
|
std::mutex queue_mutex;
|
||||||
std::condition_variable condition;
|
std::condition_variable condition;
|
||||||
|
const std::string service_name;
|
||||||
bool stop{};
|
bool stop{};
|
||||||
};
|
};
|
||||||
|
|
||||||
ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads) {
|
ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name)
|
||||||
|
: service_name{name} {
|
||||||
for (std::size_t i = 0; i < num_threads; ++i)
|
for (std::size_t i = 0; i < num_threads; ++i)
|
||||||
threads.emplace_back([&] {
|
threads.emplace_back([this, &kernel] {
|
||||||
|
Common::SetCurrentThreadName(std::string{"Hle_" + service_name}.c_str());
|
||||||
|
|
||||||
// Wait for first request before trying to acquire a render context
|
// Wait for first request before trying to acquire a render context
|
||||||
{
|
{
|
||||||
std::unique_lock lock{queue_mutex};
|
std::unique_lock lock{queue_mutex};
|
||||||
|
@ -52,7 +57,7 @@ ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads) {
|
||||||
{
|
{
|
||||||
std::unique_lock lock{queue_mutex};
|
std::unique_lock lock{queue_mutex};
|
||||||
condition.wait(lock, [this] { return stop || !requests.empty(); });
|
condition.wait(lock, [this] { return stop || !requests.empty(); });
|
||||||
if (stop && requests.empty()) {
|
if (stop || requests.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
task = std::move(requests.front());
|
task = std::move(requests.front());
|
||||||
|
@ -68,9 +73,14 @@ void ServiceThread::Impl::QueueSyncRequest(ServerSession& session,
|
||||||
std::shared_ptr<HLERequestContext>&& context) {
|
std::shared_ptr<HLERequestContext>&& context) {
|
||||||
{
|
{
|
||||||
std::unique_lock lock{queue_mutex};
|
std::unique_lock lock{queue_mutex};
|
||||||
requests.emplace([session{SharedFrom(&session)}, context{std::move(context)}]() {
|
|
||||||
session->CompleteSyncRequest(*context);
|
// ServerSession owns the service thread, so we cannot caption a strong pointer here in the
|
||||||
return;
|
// event that the ServerSession is terminated.
|
||||||
|
std::weak_ptr<ServerSession> weak_ptr{SharedFrom(&session)};
|
||||||
|
requests.emplace([weak_ptr, context{std::move(context)}]() {
|
||||||
|
if (auto strong_ptr = weak_ptr.lock()) {
|
||||||
|
strong_ptr->CompleteSyncRequest(*context);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
condition.notify_one();
|
condition.notify_one();
|
||||||
|
@ -87,8 +97,8 @@ ServiceThread::Impl::~Impl() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads)
|
ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name)
|
||||||
: impl{std::make_unique<Impl>(kernel, num_threads)} {}
|
: impl{std::make_unique<Impl>(kernel, num_threads, name)} {}
|
||||||
|
|
||||||
ServiceThread::~ServiceThread() = default;
|
ServiceThread::~ServiceThread() = default;
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace Kernel {
|
namespace Kernel {
|
||||||
|
|
||||||
|
@ -14,7 +15,7 @@ class ServerSession;
|
||||||
|
|
||||||
class ServiceThread final {
|
class ServiceThread final {
|
||||||
public:
|
public:
|
||||||
explicit ServiceThread(KernelCore& kernel, std::size_t num_threads);
|
explicit ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name);
|
||||||
~ServiceThread();
|
~ServiceThread();
|
||||||
|
|
||||||
void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
|
void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
|
||||||
|
|
|
@ -225,6 +225,8 @@ add_library(video_core STATIC
|
||||||
shader/transform_feedback.h
|
shader/transform_feedback.h
|
||||||
surface.cpp
|
surface.cpp
|
||||||
surface.h
|
surface.h
|
||||||
|
texture_cache/accelerated_swizzle.cpp
|
||||||
|
texture_cache/accelerated_swizzle.h
|
||||||
texture_cache/decode_bc4.cpp
|
texture_cache/decode_bc4.cpp
|
||||||
texture_cache/decode_bc4.h
|
texture_cache/decode_bc4.h
|
||||||
texture_cache/descriptor_table.h
|
texture_cache/descriptor_table.h
|
||||||
|
|
|
@ -3,10 +3,11 @@
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <bit>
|
#include <bit>
|
||||||
|
#include <span>
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
#include <glad/glad.h>
|
#include <glad/glad.h>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/div_ceil.h"
|
#include "common/div_ceil.h"
|
||||||
|
@ -19,6 +20,7 @@
|
||||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||||
#include "video_core/renderer_opengl/util_shaders.h"
|
#include "video_core/renderer_opengl/util_shaders.h"
|
||||||
#include "video_core/surface.h"
|
#include "video_core/surface.h"
|
||||||
|
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||||
#include "video_core/texture_cache/types.h"
|
#include "video_core/texture_cache/types.h"
|
||||||
#include "video_core/texture_cache/util.h"
|
#include "video_core/texture_cache/util.h"
|
||||||
#include "video_core/textures/decoders.h"
|
#include "video_core/textures/decoders.h"
|
||||||
|
@ -27,14 +29,12 @@ namespace OpenGL {
|
||||||
|
|
||||||
using namespace HostShaders;
|
using namespace HostShaders;
|
||||||
|
|
||||||
using Tegra::Texture::GOB_SIZE_SHIFT;
|
|
||||||
using Tegra::Texture::GOB_SIZE_X;
|
|
||||||
using Tegra::Texture::GOB_SIZE_X_SHIFT;
|
|
||||||
using Tegra::Texture::GOB_SIZE_Y_SHIFT;
|
|
||||||
using VideoCommon::Extent3D;
|
using VideoCommon::Extent3D;
|
||||||
using VideoCommon::ImageCopy;
|
using VideoCommon::ImageCopy;
|
||||||
using VideoCommon::ImageType;
|
using VideoCommon::ImageType;
|
||||||
using VideoCommon::SwizzleParameters;
|
using VideoCommon::SwizzleParameters;
|
||||||
|
using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
|
||||||
|
using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
|
||||||
using VideoCore::Surface::BytesPerBlock;
|
using VideoCore::Surface::BytesPerBlock;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -69,50 +69,32 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
|
||||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||||
static constexpr GLuint LOC_ORIGIN = 0;
|
|
||||||
static constexpr GLuint LOC_DESTINATION = 1;
|
|
||||||
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
|
|
||||||
static constexpr GLuint LOC_LAYER_STRIDE = 3;
|
|
||||||
static constexpr GLuint LOC_BLOCK_SIZE = 4;
|
|
||||||
static constexpr GLuint LOC_X_SHIFT = 5;
|
|
||||||
static constexpr GLuint LOC_BLOCK_HEIGHT = 6;
|
|
||||||
static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7;
|
|
||||||
|
|
||||||
const u32 bytes_per_block = BytesPerBlock(image.info.format);
|
|
||||||
const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block);
|
|
||||||
|
|
||||||
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
||||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||||
glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO
|
|
||||||
glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO
|
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2);
|
|
||||||
glUniform1ui(LOC_LAYER_STRIDE, image.info.layer_stride);
|
|
||||||
for (const SwizzleParameters& swizzle : swizzles) {
|
for (const SwizzleParameters& swizzle : swizzles) {
|
||||||
const Extent3D block = swizzle.block;
|
|
||||||
const Extent3D num_tiles = swizzle.num_tiles;
|
const Extent3D num_tiles = swizzle.num_tiles;
|
||||||
const size_t offset = swizzle.buffer_offset + buffer_offset;
|
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||||
|
|
||||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||||
|
|
||||||
const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level);
|
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
|
||||||
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
|
glUniform3uiv(0, 1, params.origin.data());
|
||||||
|
glUniform3iv(1, 1, params.destination.data());
|
||||||
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
|
glUniform1ui(2, params.bytes_per_block_log2);
|
||||||
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
|
glUniform1ui(3, params.layer_stride);
|
||||||
|
glUniform1ui(4, params.block_size);
|
||||||
const u32 block_height_mask = (1U << block.height) - 1;
|
glUniform1ui(5, params.x_shift);
|
||||||
const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth;
|
glUniform1ui(6, params.block_height);
|
||||||
|
glUniform1ui(7, params.block_height_mask);
|
||||||
glUniform1ui(LOC_BLOCK_SIZE, block_size);
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||||
glUniform1ui(LOC_X_SHIFT, x_shift);
|
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||||
glUniform1ui(LOC_BLOCK_HEIGHT, block.height);
|
|
||||||
glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask);
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
|
|
||||||
image.guest_size_bytes - swizzle.buffer_offset);
|
|
||||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||||
GL_WRITE_ONLY, StoreFormat(bytes_per_block));
|
GL_WRITE_ONLY, store_format);
|
||||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||||
}
|
}
|
||||||
program_manager.RestoreGuestCompute();
|
program_manager.RestoreGuestCompute();
|
||||||
|
@ -126,60 +108,35 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
|
||||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||||
|
|
||||||
static constexpr GLuint LOC_ORIGIN = 0;
|
|
||||||
static constexpr GLuint LOC_DESTINATION = 1;
|
|
||||||
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
|
|
||||||
static constexpr GLuint SLICE_SIZE_LOC = 3;
|
|
||||||
static constexpr GLuint LOC_BLOCK_SIZE = 4;
|
|
||||||
static constexpr GLuint LOC_X_SHIFT = 5;
|
|
||||||
static constexpr GLuint LOC_BLOCK_HEIGHT = 6;
|
|
||||||
static constexpr GLuint LOC_BLOCK_HEIGHT_MASK = 7;
|
|
||||||
static constexpr GLuint BLOCK_DEPTH_LOC = 8;
|
|
||||||
static constexpr GLuint BLOCK_DEPTH_MASK_LOC = 9;
|
|
||||||
|
|
||||||
const u32 bytes_per_block = BytesPerBlock(image.info.format);
|
|
||||||
const u32 bytes_per_block_log2 = std::countr_zero(bytes_per_block);
|
|
||||||
|
|
||||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||||
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||||
glUniform3ui(LOC_ORIGIN, 0, 0, 0); // TODO
|
|
||||||
glUniform3i(LOC_DESTINATION, 0, 0, 0); // TODO
|
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block_log2);
|
|
||||||
for (const SwizzleParameters& swizzle : swizzles) {
|
for (const SwizzleParameters& swizzle : swizzles) {
|
||||||
const Extent3D block = swizzle.block;
|
const Extent3D block = swizzle.block;
|
||||||
const Extent3D num_tiles = swizzle.num_tiles;
|
const Extent3D num_tiles = swizzle.num_tiles;
|
||||||
const size_t offset = swizzle.buffer_offset + buffer_offset;
|
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||||
|
|
||||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||||
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
|
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
|
||||||
|
|
||||||
const u32 stride_alignment = CalculateLevelStrideAlignment(image.info, swizzle.level);
|
const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
|
||||||
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
|
glUniform3uiv(0, 1, params.origin.data());
|
||||||
|
glUniform3iv(1, 1, params.destination.data());
|
||||||
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
|
glUniform1ui(2, params.bytes_per_block_log2);
|
||||||
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
|
glUniform1ui(3, params.slice_size);
|
||||||
const u32 slice_size =
|
glUniform1ui(4, params.block_size);
|
||||||
Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
|
glUniform1ui(5, params.x_shift);
|
||||||
|
glUniform1ui(6, params.block_height);
|
||||||
const u32 block_height_mask = (1U << block.height) - 1;
|
glUniform1ui(7, params.block_height_mask);
|
||||||
const u32 block_depth_mask = (1U << block.depth) - 1;
|
glUniform1ui(8, params.block_depth);
|
||||||
const u32 x_shift = GOB_SIZE_SHIFT + block.height + block.depth;
|
glUniform1ui(9, params.block_depth_mask);
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||||
glUniform1ui(SLICE_SIZE_LOC, slice_size);
|
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||||
glUniform1ui(LOC_BLOCK_SIZE, block_size);
|
|
||||||
glUniform1ui(LOC_X_SHIFT, x_shift);
|
|
||||||
glUniform1ui(LOC_BLOCK_HEIGHT, block.height);
|
|
||||||
glUniform1ui(LOC_BLOCK_HEIGHT_MASK, block_height_mask);
|
|
||||||
glUniform1ui(BLOCK_DEPTH_LOC, block.depth);
|
|
||||||
glUniform1ui(BLOCK_DEPTH_MASK_LOC, block_depth_mask);
|
|
||||||
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
|
|
||||||
image.guest_size_bytes - swizzle.buffer_offset);
|
|
||||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||||
GL_WRITE_ONLY, StoreFormat(bytes_per_block));
|
GL_WRITE_ONLY, store_format);
|
||||||
|
|
||||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||||
}
|
}
|
||||||
program_manager.RestoreGuestCompute();
|
program_manager.RestoreGuestCompute();
|
||||||
|
@ -204,22 +161,20 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
|
||||||
|
|
||||||
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
||||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||||
glUniform2ui(LOC_ORIGIN, 0, 0); // TODO
|
glUniform2ui(LOC_ORIGIN, 0, 0);
|
||||||
glUniform2i(LOC_DESTINATION, 0, 0); // TODO
|
glUniform2i(LOC_DESTINATION, 0, 0);
|
||||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
|
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
|
||||||
glUniform1ui(LOC_PITCH, pitch);
|
glUniform1ui(LOC_PITCH, pitch);
|
||||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
|
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
|
||||||
for (const SwizzleParameters& swizzle : swizzles) {
|
for (const SwizzleParameters& swizzle : swizzles) {
|
||||||
const Extent3D num_tiles = swizzle.num_tiles;
|
const Extent3D num_tiles = swizzle.num_tiles;
|
||||||
const size_t offset = swizzle.buffer_offset + buffer_offset;
|
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||||
|
|
||||||
const u32 aligned_width = Common::AlignUp(num_tiles.width, WORKGROUP_SIZE.width);
|
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||||
const u32 aligned_height = Common::AlignUp(num_tiles.height, WORKGROUP_SIZE.height);
|
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||||
const u32 num_dispatches_x = aligned_width / WORKGROUP_SIZE.width;
|
|
||||||
const u32 num_dispatches_y = aligned_height / WORKGROUP_SIZE.height;
|
|
||||||
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), offset,
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||||
image.guest_size_bytes - swizzle.buffer_offset);
|
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
|
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
|
||||||
}
|
}
|
||||||
program_manager.RestoreGuestCompute();
|
program_manager.RestoreGuestCompute();
|
||||||
|
|
70
src/video_core/texture_cache/accelerated_swizzle.cpp
Executable file
70
src/video_core/texture_cache/accelerated_swizzle.cpp
Executable file
|
@ -0,0 +1,70 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <bit>
|
||||||
|
|
||||||
|
#include "common/alignment.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/div_ceil.h"
|
||||||
|
#include "video_core/surface.h"
|
||||||
|
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||||
|
#include "video_core/texture_cache/util.h"
|
||||||
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
|
namespace VideoCommon::Accelerated {
|
||||||
|
|
||||||
|
using Tegra::Texture::GOB_SIZE_SHIFT;
|
||||||
|
using Tegra::Texture::GOB_SIZE_X;
|
||||||
|
using Tegra::Texture::GOB_SIZE_X_SHIFT;
|
||||||
|
using Tegra::Texture::GOB_SIZE_Y_SHIFT;
|
||||||
|
using VideoCore::Surface::BytesPerBlock;
|
||||||
|
|
||||||
|
BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle,
|
||||||
|
const ImageInfo& info) {
|
||||||
|
const Extent3D block = swizzle.block;
|
||||||
|
const Extent3D num_tiles = swizzle.num_tiles;
|
||||||
|
const u32 bytes_per_block = BytesPerBlock(info.format);
|
||||||
|
const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
|
||||||
|
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
|
||||||
|
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
|
||||||
|
return BlockLinearSwizzle2DParams{
|
||||||
|
.origin{0, 0, 0},
|
||||||
|
.destination{0, 0, 0},
|
||||||
|
.bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
|
||||||
|
.layer_stride = info.layer_stride,
|
||||||
|
.block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth),
|
||||||
|
.x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
|
||||||
|
.block_height = block.height,
|
||||||
|
.block_height_mask = (1U << block.height) - 1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle,
|
||||||
|
const ImageInfo& info) {
|
||||||
|
const Extent3D block = swizzle.block;
|
||||||
|
const Extent3D num_tiles = swizzle.num_tiles;
|
||||||
|
const u32 bytes_per_block = BytesPerBlock(info.format);
|
||||||
|
const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
|
||||||
|
const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
|
||||||
|
|
||||||
|
const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
|
||||||
|
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
|
||||||
|
const u32 slice_size =
|
||||||
|
Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
|
||||||
|
return BlockLinearSwizzle3DParams{
|
||||||
|
.origin{0, 0, 0},
|
||||||
|
.destination{0, 0, 0},
|
||||||
|
.bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
|
||||||
|
.slice_size = slice_size,
|
||||||
|
.block_size = block_size,
|
||||||
|
.x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
|
||||||
|
.block_height = block.height,
|
||||||
|
.block_height_mask = (1U << block.height) - 1,
|
||||||
|
.block_depth = block.depth,
|
||||||
|
.block_depth_mask = (1U << block.depth) - 1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace VideoCommon::Accelerated
|
45
src/video_core/texture_cache/accelerated_swizzle.h
Executable file
45
src/video_core/texture_cache/accelerated_swizzle.h
Executable file
|
@ -0,0 +1,45 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/texture_cache/image_info.h"
|
||||||
|
#include "video_core/texture_cache/types.h"
|
||||||
|
|
||||||
|
namespace VideoCommon::Accelerated {
|
||||||
|
|
||||||
|
struct BlockLinearSwizzle2DParams {
|
||||||
|
std::array<u32, 3> origin;
|
||||||
|
std::array<s32, 3> destination;
|
||||||
|
u32 bytes_per_block_log2;
|
||||||
|
u32 layer_stride;
|
||||||
|
u32 block_size;
|
||||||
|
u32 x_shift;
|
||||||
|
u32 block_height;
|
||||||
|
u32 block_height_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BlockLinearSwizzle3DParams {
|
||||||
|
std::array<u32, 3> origin;
|
||||||
|
std::array<s32, 3> destination;
|
||||||
|
u32 bytes_per_block_log2;
|
||||||
|
u32 slice_size;
|
||||||
|
u32 block_size;
|
||||||
|
u32 x_shift;
|
||||||
|
u32 block_height;
|
||||||
|
u32 block_height_mask;
|
||||||
|
u32 block_depth;
|
||||||
|
u32 block_depth_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(
|
||||||
|
const SwizzleParameters& swizzle, const ImageInfo& info);
|
||||||
|
|
||||||
|
[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(
|
||||||
|
const SwizzleParameters& swizzle, const ImageInfo& info);
|
||||||
|
|
||||||
|
} // namespace VideoCommon::Accelerated
|
|
@ -19,7 +19,6 @@
|
||||||
namespace Tegra::Texture {
|
namespace Tegra::Texture {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
|
* This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
|
||||||
* Calculates the offset of an (x, y) position within a swizzled texture.
|
* Calculates the offset of an (x, y) position within a swizzled texture.
|
||||||
|
@ -41,11 +40,15 @@ constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst();
|
||||||
template <bool TO_LINEAR>
|
template <bool TO_LINEAR>
|
||||||
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
|
void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
|
||||||
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
|
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
|
||||||
static constexpr u32 origin_x = 0; // TODO
|
// The origin of the transformation can be configured here, leave it as zero as the current API
|
||||||
static constexpr u32 origin_y = 0; // TODO
|
// doesn't expose it.
|
||||||
static constexpr u32 origin_z = 0; // TODO
|
static constexpr u32 origin_x = 0;
|
||||||
|
static constexpr u32 origin_y = 0;
|
||||||
|
static constexpr u32 origin_z = 0;
|
||||||
|
|
||||||
const u32 pitch = width * bytes_per_pixel; // TODO
|
// We can configure here a custom pitch
|
||||||
|
// As it's not exposed 'width * bpp' will be the expected pitch.
|
||||||
|
const u32 pitch = width * bytes_per_pixel;
|
||||||
const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
|
const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
|
||||||
|
|
||||||
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
|
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
|
||||||
|
@ -86,7 +89,6 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
SwizzleTable MakeSwizzleTable() {
|
SwizzleTable MakeSwizzleTable() {
|
||||||
|
|
Loading…
Reference in a new issue