early-access version 3275

This commit is contained in:
pineappleEA 2023-01-03 22:43:57 +01:00
parent 4b11185ae8
commit 0f64bad6bd
26 changed files with 350 additions and 160 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 3274. This is the source code for early-access 3275.
## Legal Notice ## Legal Notice

View file

@ -11,6 +11,11 @@
namespace Core::HID { namespace Core::HID {
constexpr s32 HID_JOYSTICK_MAX = 0x7fff; constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
constexpr s32 HID_TRIGGER_MAX = 0x7fff; constexpr s32 HID_TRIGGER_MAX = 0x7fff;
// Use a common UUID for TAS and Virtual Gamepad
constexpr Common::UUID TAS_UUID =
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
constexpr Common::UUID VIRTUAL_UUID =
Common::UUID{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
EmulatedController::EmulatedController(NpadIdType npad_id_type_) : npad_id_type(npad_id_type_) {} EmulatedController::EmulatedController(NpadIdType npad_id_type_) : npad_id_type(npad_id_type_) {}
@ -392,10 +397,6 @@ void EmulatedController::ReloadInput() {
nfc_devices[index]->ForceUpdate(); nfc_devices[index]->ForceUpdate();
} }
// Use a common UUID for TAS
static constexpr Common::UUID TAS_UUID = Common::UUID{
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xA5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
// Register TAS devices. No need to force update // Register TAS devices. No need to force update
for (std::size_t index = 0; index < tas_button_devices.size(); ++index) { for (std::size_t index = 0; index < tas_button_devices.size(); ++index) {
if (!tas_button_devices[index]) { if (!tas_button_devices[index]) {
@ -421,10 +422,6 @@ void EmulatedController::ReloadInput() {
}); });
} }
// Use a common UUID for Virtual Gamepad
static constexpr Common::UUID VIRTUAL_UUID = Common::UUID{
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}};
// Register virtual devices. No need to force update // Register virtual devices. No need to force update
for (std::size_t index = 0; index < virtual_button_devices.size(); ++index) { for (std::size_t index = 0; index < virtual_button_devices.size(); ++index) {
if (!virtual_button_devices[index]) { if (!virtual_button_devices[index]) {
@ -842,7 +839,12 @@ void EmulatedController::SetStick(const Common::Input::CallbackStatus& callback,
// Only read stick values that have the same uuid or are over the threshold to avoid flapping // Only read stick values that have the same uuid or are over the threshold to avoid flapping
if (controller.stick_values[index].uuid != uuid) { if (controller.stick_values[index].uuid != uuid) {
if (!stick_value.down && !stick_value.up && !stick_value.left && !stick_value.right) { const bool is_tas = uuid == TAS_UUID;
if (is_tas && stick_value.x.value == 0 && stick_value.y.value == 0) {
return;
}
if (!is_tas && !stick_value.down && !stick_value.up && !stick_value.left &&
!stick_value.right) {
return; return;
} }
} }

View file

@ -74,8 +74,6 @@ void IRS::DeactivateIrsensor(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_IRS, "(STUBBED) called, applet_resource_user_id={}", LOG_WARNING(Service_IRS, "(STUBBED) called, applet_resource_user_id={}",
applet_resource_user_id); applet_resource_user_id);
npad_device->SetPollingMode(Common::Input::PollingMode::Active);
IPC::ResponseBuilder rb{ctx, 2}; IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ResultSuccess); rb.Push(ResultSuccess);
} }
@ -514,7 +512,7 @@ void IRS::StopImageProcessorAsync(Kernel::HLERequestContext& ctx) {
auto result = IsIrCameraHandleValid(parameters.camera_handle); auto result = IsIrCameraHandleValid(parameters.camera_handle);
if (result.IsSuccess()) { if (result.IsSuccess()) {
// TODO: Stop image processor async // TODO: Stop image processor async
npad_device->SetPollingMode(Common::Input::PollingMode::IR); npad_device->SetPollingMode(Common::Input::PollingMode::Active);
result = ResultSuccess; result = ResultSuccess;
} }

View file

@ -264,6 +264,16 @@ DriverResult JoyconDriver::SetPollingMode() {
irs_protocol->DisableIrs(); irs_protocol->DisableIrs();
} }
if (nfc_protocol->IsEnabled()) {
amiibo_detected = false;
nfc_protocol->DisableNfc();
}
if (ring_protocol->IsEnabled()) {
ring_connected = false;
ring_protocol->DisableRingCon();
}
if (irs_enabled && supported_features.irs) { if (irs_enabled && supported_features.irs) {
auto result = irs_protocol->EnableIrs(); auto result = irs_protocol->EnableIrs();
if (result == DriverResult::Success) { if (result == DriverResult::Success) {
@ -274,11 +284,6 @@ DriverResult JoyconDriver::SetPollingMode() {
LOG_ERROR(Input, "Error enabling IRS"); LOG_ERROR(Input, "Error enabling IRS");
} }
if (nfc_protocol->IsEnabled()) {
amiibo_detected = false;
nfc_protocol->DisableNfc();
}
if (nfc_enabled && supported_features.nfc) { if (nfc_enabled && supported_features.nfc) {
auto result = nfc_protocol->EnableNfc(); auto result = nfc_protocol->EnableNfc();
if (result == DriverResult::Success) { if (result == DriverResult::Success) {
@ -292,11 +297,6 @@ DriverResult JoyconDriver::SetPollingMode() {
LOG_ERROR(Input, "Error enabling NFC"); LOG_ERROR(Input, "Error enabling NFC");
} }
if (ring_protocol->IsEnabled()) {
ring_connected = false;
ring_protocol->DisableRingCon();
}
if (hidbus_enabled && supported_features.hidbus) { if (hidbus_enabled && supported_features.hidbus) {
auto result = ring_protocol->EnableRingCon(); auto result = ring_protocol->EnableRingCon();
if (result == DriverResult::Success) { if (result == DriverResult::Success) {
@ -428,6 +428,12 @@ DriverResult JoyconDriver::SetPasiveMode() {
} }
DriverResult JoyconDriver::SetActiveMode() { DriverResult JoyconDriver::SetActiveMode() {
if (is_ring_disabled_by_irs) {
is_ring_disabled_by_irs = false;
SetActiveMode();
return SetRingConMode();
}
std::scoped_lock lock{mutex}; std::scoped_lock lock{mutex};
motion_enabled = true; motion_enabled = true;
hidbus_enabled = false; hidbus_enabled = false;
@ -444,6 +450,10 @@ DriverResult JoyconDriver::SetIrMode() {
return DriverResult::NotSupported; return DriverResult::NotSupported;
} }
if (ring_connected) {
is_ring_disabled_by_irs = true;
}
motion_enabled = false; motion_enabled = false;
hidbus_enabled = false; hidbus_enabled = false;
nfc_enabled = false; nfc_enabled = false;

View file

@ -108,6 +108,7 @@ private:
bool starlink_connected{}; bool starlink_connected{};
bool ring_connected{}; bool ring_connected{};
bool amiibo_detected{}; bool amiibo_detected{};
bool is_ring_disabled_by_irs{};
// Harware configuration // Harware configuration
u8 leds{}; u8 leds{};

View file

@ -74,8 +74,8 @@ DriverResult JoyconCommonProtocol::SendData(std::span<const u8> buffer) {
} }
DriverResult JoyconCommonProtocol::GetSubCommandResponse(SubCommand sc, std::vector<u8>& output) { DriverResult JoyconCommonProtocol::GetSubCommandResponse(SubCommand sc, std::vector<u8>& output) {
constexpr int timeout_mili = 100; constexpr int timeout_mili = 66;
constexpr int MaxTries = 10; constexpr int MaxTries = 15;
int tries = 0; int tries = 0;
output.resize(MaxSubCommandResponseSize); output.resize(MaxSubCommandResponseSize);

View file

@ -57,11 +57,16 @@ public:
return start_address; return start_address;
} }
[[nodiscard]] bool IsPropietaryDriver() const noexcept {
return is_propietary_driver;
}
protected: protected:
ProgramHeader sph{}; ProgramHeader sph{};
std::array<u32, 8> gp_passthrough_mask{}; std::array<u32, 8> gp_passthrough_mask{};
Stage stage{}; Stage stage{};
u32 start_address{}; u32 start_address{};
bool is_propietary_driver{};
}; };
} // namespace Shader } // namespace Shader

View file

@ -677,6 +677,30 @@ void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
} }
} }
void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank,
u32 offset_start = 0, u32 offset_end = std::numeric_limits<u16>::max()) {
const IR::Value bank{inst.Arg(0)};
const IR::Value offset{inst.Arg(1)};
if (!bank.IsImmediate() || !offset.IsImmediate()) {
return;
}
const auto bank_value = bank.U32();
if (bank_value != which_bank) {
return;
}
const auto offset_value = offset.U32();
if (offset_value < offset_start || offset_value >= offset_end) {
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
inst.ReplaceUsesWith(IR::Value{env.ReadCbufValue(bank_value, offset_value)});
} else {
inst.ReplaceUsesWith(
IR::Value{Common::BitCast<f32>(env.ReadCbufValue(bank_value, offset_value))});
}
}
void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::GetRegister: case IR::Opcode::GetRegister:
@ -825,13 +849,17 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
case IR::Opcode::GetCbufF32: case IR::Opcode::GetCbufF32:
case IR::Opcode::GetCbufU32: case IR::Opcode::GetCbufU32:
if (env.HasHLEMacroState()) { if (env.HasHLEMacroState()) {
return FoldConstBuffer(env, block, inst); FoldConstBuffer(env, block, inst);
}
if (env.IsPropietaryDriver()) {
FoldDriverConstBuffer(env, block, inst, 1);
} }
break; break;
default: default:
break; break;
} }
} }
} // Anonymous namespace } // Anonymous namespace
void ConstantPropagationPass(Environment& env, IR::Program& program) { void ConstantPropagationPass(Environment& env, IR::Program& program) {

View file

@ -94,7 +94,7 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) { void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
draw_state.topology = topology; draw_state.topology = topology;
ProcessDrawIndirect(true); ProcessDrawIndirect();
} }
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first,
@ -105,7 +105,7 @@ void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_firs
draw_state.index_buffer.first = index_first; draw_state.index_buffer.first = index_first;
draw_state.index_buffer.count = index_count; draw_state.index_buffer.count = index_count;
ProcessDrawIndirect(true); ProcessDrawIndirect();
} }
void DrawManager::SetInlineIndexBuffer(u32 index) { void DrawManager::SetInlineIndexBuffer(u32 index) {
@ -216,9 +216,12 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) {
} }
} }
void DrawManager::ProcessDrawIndirect(bool draw_indexed) { void DrawManager::ProcessDrawIndirect() {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology, LOG_TRACE(
draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count); HW_GPU,
"called, topology={}, is_indexed={}, includes_count={}, buffer_size={}, max_draw_count={}",
draw_state.topology, indirect_state.is_indexed, indirect_state.include_count,
indirect_state.buffer_size, indirect_state.max_draw_counts);
UpdateTopology(); UpdateTopology();

View file

@ -85,7 +85,7 @@ private:
void ProcessDraw(bool draw_indexed, u32 instance_count); void ProcessDraw(bool draw_indexed, u32 instance_count);
void ProcessDrawIndirect(bool draw_indexed); void ProcessDrawIndirect();
Maxwell3D* maxwell3d{}; Maxwell3D* maxwell3d{};
State draw_state{}; State draw_state{};

View file

@ -685,7 +685,8 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method]; return regs.reg_array[method];
} }
void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { void Maxwell3D::SetHLEReplacementAttributeType(u32 bank, u32 offset,
HLEReplacementAttributeType name) {
const u64 key = (static_cast<u64>(bank) << 32) | offset; const u64 key = (static_cast<u64>(bank) << 32) | offset;
replace_table.emplace(key, name); replace_table.emplace(key, name);
} }

View file

@ -2711,7 +2711,7 @@ public:
u32 post_z_pixel_imask; ///< 0x0F1C u32 post_z_pixel_imask; ///< 0x0F1C
INSERT_PADDING_BYTES_NOINIT(0x20); INSERT_PADDING_BYTES_NOINIT(0x20);
ConstantColorRendering const_color_rendering; ///< 0x0F40 ConstantColorRendering const_color_rendering; ///< 0x0F40
s32 stencil_back_ref; ///< 0x0F54 u32 stencil_back_ref; ///< 0x0F54
u32 stencil_back_mask; ///< 0x0F58 u32 stencil_back_mask; ///< 0x0F58
u32 stencil_back_func_mask; ///< 0x0F5C u32 stencil_back_func_mask; ///< 0x0F5C
INSERT_PADDING_BYTES_NOINIT(0x14); INSERT_PADDING_BYTES_NOINIT(0x14);
@ -2835,9 +2835,9 @@ public:
Blend blend; ///< 0x133C Blend blend; ///< 0x133C
u32 stencil_enable; ///< 0x1380 u32 stencil_enable; ///< 0x1380
StencilOp stencil_front_op; ///< 0x1384 StencilOp stencil_front_op; ///< 0x1384
s32 stencil_front_ref; ///< 0x1394 u32 stencil_front_ref; ///< 0x1394
s32 stencil_front_func_mask; ///< 0x1398 u32 stencil_front_func_mask; ///< 0x1398
s32 stencil_front_mask; ///< 0x139C u32 stencil_front_mask; ///< 0x139C
INSERT_PADDING_BYTES_NOINIT(0x4); INSERT_PADDING_BYTES_NOINIT(0x4);
u32 draw_auto_start_byte_count; ///< 0x13A4 u32 draw_auto_start_byte_count; ///< 0x13A4
PsSaturate frag_color_clamp; ///< 0x13A8 PsSaturate frag_color_clamp; ///< 0x13A8
@ -3031,14 +3031,14 @@ public:
EngineHint engine_state{EngineHint::None}; EngineHint engine_state{EngineHint::None};
enum class HLEReplaceName : u32 { enum class HLEReplacementAttributeType : u32 {
BaseVertex = 0x0, BaseVertex = 0x0,
BaseInstance = 0x1, BaseInstance = 0x1,
}; };
void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); void SetHLEReplacementAttributeType(u32 bank, u32 offset, HLEReplacementAttributeType name);
std::unordered_map<u64, HLEReplaceName> replace_table; std::unordered_map<u64, HLEReplacementAttributeType> replace_table;
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
@ -3089,7 +3089,7 @@ public:
std::vector<u8> inline_index_draw_indexes; std::vector<u8> inline_index_draw_indexes;
GPUVAddr getMacroAddress(size_t index) const { GPUVAddr GetMacroAddress(size_t index) const {
return macro_addresses[index]; return macro_addresses[index];
} }
@ -3100,7 +3100,7 @@ public:
RefreshParametersImpl(); RefreshParametersImpl();
} }
bool AnyParametersDirty() { bool AnyParametersDirty() const {
return current_macro_dirty; return current_macro_dirty;
} }
@ -3196,11 +3196,6 @@ private:
bool execute_on{true}; bool execute_on{true};
std::array<bool, Regs::NUM_REGS> draw_command{};
std::vector<u32> deferred_draw_method;
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
DrawMode draw_mode{DrawMode::General};
bool draw_indexed{};
std::vector<std::pair<GPUVAddr, size_t>> macro_segments; std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
std::vector<GPUVAddr> macro_addresses; std::vector<GPUVAddr> macro_addresses;
bool current_macro_dirty{}; bool current_macro_dirty{};

View file

@ -81,14 +81,15 @@ public:
params.is_indexed = false; params.is_indexed = false;
params.include_count = false; params.include_count = false;
params.count_start_address = 0; params.count_start_address = 0;
params.indirect_start_address = maxwell3d.getMacroAddress(1); params.indirect_start_address = maxwell3d.GetMacroAddress(1);
params.buffer_size = 4 * sizeof(u32); params.buffer_size = 4 * sizeof(u32);
params.max_draw_counts = 1; params.max_draw_counts = 1;
params.stride = 0; params.stride = 0;
if (extended) { if (extended) {
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); maxwell3d.SetHLEReplacementAttributeType(
0, 0x640, Maxwell::HLEReplacementAttributeType::BaseInstance);
} }
maxwell3d.draw_manager->DrawArrayIndirect(topology); maxwell3d.draw_manager->DrawArrayIndirect(topology);
@ -125,7 +126,8 @@ private:
if (extended) { if (extended) {
maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.regs.global_base_instance_index = base_instance;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); maxwell3d.SetHLEReplacementAttributeType(
0, 0x640, Maxwell::HLEReplacementAttributeType::BaseInstance);
} }
maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance,
@ -160,13 +162,15 @@ public:
maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.regs.global_base_instance_index = base_instance;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); maxwell3d.SetHLEReplacementAttributeType(0, 0x640,
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); Maxwell::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
auto& params = maxwell3d.draw_manager->GetIndirectParams(); auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_indexed = true; params.is_indexed = true;
params.include_count = false; params.include_count = false;
params.count_start_address = 0; params.count_start_address = 0;
params.indirect_start_address = maxwell3d.getMacroAddress(1); params.indirect_start_address = maxwell3d.GetMacroAddress(1);
params.buffer_size = 5 * sizeof(u32); params.buffer_size = 5 * sizeof(u32);
params.max_draw_counts = 1; params.max_draw_counts = 1;
params.stride = 0; params.stride = 0;
@ -190,8 +194,10 @@ private:
maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.regs.global_base_instance_index = base_instance;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); maxwell3d.SetHLEReplacementAttributeType(0, 0x640,
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); Maxwell::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
maxwell3d.draw_manager->DrawIndex( maxwell3d.draw_manager->DrawIndex(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
@ -253,15 +259,17 @@ public:
auto& params = maxwell3d.draw_manager->GetIndirectParams(); auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_indexed = true; params.is_indexed = true;
params.include_count = true; params.include_count = true;
params.count_start_address = maxwell3d.getMacroAddress(4); params.count_start_address = maxwell3d.GetMacroAddress(4);
params.indirect_start_address = maxwell3d.getMacroAddress(5); params.indirect_start_address = maxwell3d.GetMacroAddress(5);
params.buffer_size = stride * draw_count; params.buffer_size = stride * draw_count;
params.max_draw_counts = draw_count; params.max_draw_counts = draw_count;
params.stride = stride; params.stride = stride;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); maxwell3d.SetHLEReplacementAttributeType(0, 0x640,
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); Maxwell::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
maxwell3d.engine_state = Maxwell::EngineHint::None; maxwell3d.engine_state = Maxwell::EngineHint::None;
maxwell3d.replace_table.clear(); maxwell3d.replace_table.clear();
@ -298,8 +306,10 @@ private:
const u32 base_instance = parameters[base + 4]; const u32 base_instance = parameters[base + 4];
maxwell3d.regs.vertex_id_base = base_vertex; maxwell3d.regs.vertex_id_base = base_vertex;
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); maxwell3d.SetHLEReplacementAttributeType(
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); 0, 0x640, Maxwell::HLEReplacementAttributeType::BaseVertex);
maxwell3d.SetHLEReplacementAttributeType(
0, 0x644, Maxwell::HLEReplacementAttributeType::BaseInstance);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
base_vertex, base_instance, parameters[base + 1]); base_vertex, base_instance, parameters[base + 1]);

View file

@ -51,7 +51,7 @@ using VideoCommon::LoadPipelines;
using VideoCommon::SerializePipeline; using VideoCommon::SerializePipeline;
using Context = ShaderContext::Context; using Context = ShaderContext::Context;
constexpr u32 CACHE_VERSION = 8; constexpr u32 CACHE_VERSION = 9;
template <typename Container> template <typename Container>
auto MakeSpan(Container& container) { auto MakeSpan(Container& container) {

View file

@ -354,6 +354,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool FRAMEBUFFER_BLITS = true;
static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_EMULATED_COPIES = true;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
using Runtime = OpenGL::TextureCacheRuntime; using Runtime = OpenGL::TextureCacheRuntime;
using Image = OpenGL::Image; using Image = OpenGL::Image;
@ -361,6 +362,7 @@ struct TextureCacheParams {
using ImageView = OpenGL::ImageView; using ImageView = OpenGL::ImageView;
using Sampler = OpenGL::Sampler; using Sampler = OpenGL::Sampler;
using Framebuffer = OpenGL::Framebuffer; using Framebuffer = OpenGL::Framebuffer;
using AsyncBuffer = u32;
}; };
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;

View file

@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment; using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment; using VideoCommon::GraphicsEnvironment;
constexpr u32 CACHE_VERSION = 9; constexpr u32 CACHE_VERSION = 10;
template <typename Container> template <typename Container>
auto MakeSpan(Container& container) { auto MakeSpan(Container& container) {

View file

@ -886,30 +886,52 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
if (!state_tracker.TouchStencilProperties()) { if (!state_tracker.TouchStencilProperties()) {
return; return;
} }
if (regs.stencil_two_side_enable) { bool update_references = state_tracker.TouchStencilReference();
// Separate values per face bool update_write_mask = state_tracker.TouchStencilWriteMask();
scheduler.Record( bool update_compare_masks = state_tracker.TouchStencilCompare();
[front_ref = regs.stencil_front_ref, front_write_mask = regs.stencil_front_mask, if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_ref, update_references = true;
back_write_mask = regs.stencil_back_mask, update_write_mask = true;
back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) { update_compare_masks = true;
// Front face }
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref); if (update_references) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask); scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask); two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_ref != back_ref;
// Back face // Front face
cmdbuf.SetStencilReference(
set_back ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FACE_FRONT_AND_BACK, front_ref);
if (set_back) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref); cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
}
});
}
if (update_write_mask) {
scheduler.Record([front_write_mask = regs.stencil_front_mask,
back_write_mask = regs.stencil_back_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_write_mask != back_write_mask;
// Front face
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_write_mask);
if (set_back) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask); cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
}
});
}
if (update_compare_masks) {
scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
back_test_mask = regs.stencil_back_func_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_test_mask != back_test_mask;
// Front face
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_test_mask);
if (set_back) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask); cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
}); }
} else {
// Front face defines both faces
scheduler.Record([ref = regs.stencil_front_ref, write_mask = regs.stencil_front_mask,
test_mask = regs.stencil_front_func_mask](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref);
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask);
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask);
}); });
} }
} }
@ -990,7 +1012,7 @@ void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& re
constexpr size_t POINT = 0; constexpr size_t POINT = 0;
constexpr size_t LINE = 1; constexpr size_t LINE = 1;
constexpr size_t POLYGON = 2; constexpr size_t POLYGON = 2;
constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points POINT, // Points
LINE, // Lines LINE, // Lines
LINE, // LineLoop LINE, // LineLoop
@ -1099,13 +1121,12 @@ void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
} }
void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) { void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!regs.logic_op.enable) {
return;
}
if (!state_tracker.TouchLogicOp()) { if (!state_tracker.TouchLogicOp()) {
return; return;
} }
auto op = static_cast<VkLogicOp>(static_cast<u32>(regs.logic_op.op) - 0x1500); const auto op_value = static_cast<u32>(regs.logic_op.op);
auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast<VkLogicOp>(op_value - 0x1500)
: VK_LOGIC_OP_NO_OP;
scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); }); scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); });
} }

View file

@ -30,7 +30,8 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; constexpr VkMemoryPropertyFlags HOST_FLAGS =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
bool IsStreamHeap(VkMemoryHeap heap) noexcept { bool IsStreamHeap(VkMemoryHeap heap) noexcept {
@ -92,7 +93,9 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.size = STREAM_BUFFER_SIZE, .size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0, .queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr, .pQueueFamilyIndices = nullptr,
@ -244,19 +247,15 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) { bool deferred) {
const u32 log2 = Common::Log2Ceil64(size); const u32 log2 = Common::Log2Ceil64(size);
VkBufferUsageFlags usage_flags{};
if (usage == MemoryUsage::Upload) {
usage_flags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
}
if (usage == MemoryUsage::Download) {
usage_flags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
}
vk::Buffer buffer = device.GetLogical().CreateBuffer({ vk::Buffer buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.size = 1ULL << log2, .size = 1ULL << log2,
.usage = usage_flags, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0, .queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr, .pQueueFamilyIndices = nullptr,

View file

@ -33,6 +33,9 @@ Flags MakeInvalidationFlags() {
BlendConstants, BlendConstants,
DepthBounds, DepthBounds,
StencilProperties, StencilProperties,
StencilReference,
StencilWriteMask,
StencilCompare,
LineWidth, LineWidth,
CullMode, CullMode,
DepthBoundsEnable, DepthBoundsEnable,
@ -99,14 +102,17 @@ void SetupDirtyDepthBounds(Tables& tables) {
} }
void SetupDirtyStencilProperties(Tables& tables) { void SetupDirtyStencilProperties(Tables& tables) {
auto& table = tables[0]; const auto setup = [&](size_t position, u8 flag) {
table[OFF(stencil_two_side_enable)] = StencilProperties; tables[0][position] = flag;
table[OFF(stencil_front_ref)] = StencilProperties; tables[1][position] = StencilProperties;
table[OFF(stencil_front_mask)] = StencilProperties; };
table[OFF(stencil_front_func_mask)] = StencilProperties; tables[0][OFF(stencil_two_side_enable)] = StencilProperties;
table[OFF(stencil_back_ref)] = StencilProperties; setup(OFF(stencil_front_ref), StencilReference);
table[OFF(stencil_back_mask)] = StencilProperties; setup(OFF(stencil_front_mask), StencilWriteMask);
table[OFF(stencil_back_func_mask)] = StencilProperties; setup(OFF(stencil_front_func_mask), StencilCompare);
setup(OFF(stencil_back_ref), StencilReference);
setup(OFF(stencil_back_mask), StencilWriteMask);
setup(OFF(stencil_back_func_mask), StencilCompare);
} }
void SetupDirtyLineWidth(Tables& tables) { void SetupDirtyLineWidth(Tables& tables) {

View file

@ -35,6 +35,9 @@ enum : u8 {
BlendConstants, BlendConstants,
DepthBounds, DepthBounds,
StencilProperties, StencilProperties,
StencilReference,
StencilWriteMask,
StencilCompare,
LineWidth, LineWidth,
CullMode, CullMode,
@ -113,6 +116,24 @@ public:
return Exchange(Dirty::StencilProperties, false); return Exchange(Dirty::StencilProperties, false);
} }
bool TouchStencilReference() {
return Exchange(Dirty::StencilReference, false);
}
bool TouchStencilWriteMask() {
return Exchange(Dirty::StencilWriteMask, false);
}
bool TouchStencilCompare() {
return Exchange(Dirty::StencilCompare, false);
}
bool TouchStencilSide(bool two_sided_stencil_new) {
bool result = two_sided_stencil != two_sided_stencil_new;
two_sided_stencil = two_sided_stencil_new;
return result;
}
bool TouchLineWidth() const { bool TouchLineWidth() const {
return Exchange(Dirty::LineWidth, false); return Exchange(Dirty::LineWidth, false);
} }
@ -218,6 +239,7 @@ private:
Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags; Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
bool two_sided_stencil = false;
}; };
} // namespace Vulkan } // namespace Vulkan

View file

@ -812,8 +812,12 @@ StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.Request(size, MemoryUsage::Upload); return staging_buffer_pool.Request(size, MemoryUsage::Upload);
} }
StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
return staging_buffer_pool.Request(size, MemoryUsage::Download); return staging_buffer_pool.Request(size, MemoryUsage::Download, deferred);
}
void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
staging_buffer_pool.FreeDeferred(ref);
} }
bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {

View file

@ -51,7 +51,9 @@ public:
StagingBufferRef UploadStagingBuffer(size_t size); StagingBufferRef UploadStagingBuffer(size_t size);
StagingBufferRef DownloadStagingBuffer(size_t size); StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
void FreeDeferredStagingBuffer(StagingBufferRef& ref);
void TickFrame(); void TickFrame();
@ -347,6 +349,7 @@ struct TextureCacheParams {
static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool FRAMEBUFFER_BLITS = false;
static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_EMULATED_COPIES = false;
static constexpr bool HAS_DEVICE_MEMORY_INFO = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
using Runtime = Vulkan::TextureCacheRuntime; using Runtime = Vulkan::TextureCacheRuntime;
using Image = Vulkan::Image; using Image = Vulkan::Image;
@ -354,6 +357,7 @@ struct TextureCacheParams {
using ImageView = Vulkan::ImageView; using ImageView = Vulkan::ImageView;
using Sampler = Vulkan::Sampler; using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer; using Framebuffer = Vulkan::Framebuffer;
using AsyncBuffer = Vulkan::StagingBufferRef;
}; };
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;

View file

@ -325,6 +325,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
ASSERT(local_size <= std::numeric_limits<u32>::max()); ASSERT(local_size <= std::numeric_limits<u32>::max());
local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size; local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot;
is_propietary_driver = texture_bound == 2;
has_hle_engine_state = has_hle_engine_state =
maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro; maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro;
} }
@ -350,11 +351,11 @@ std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffe
if (it == maxwell3d->replace_table.end()) { if (it == maxwell3d->replace_table.end()) {
return std::nullopt; return std::nullopt;
} }
const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) { const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplacementAttributeType name) {
switch (name) { switch (name) {
case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex: case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseVertex:
return Shader::ReplaceConstant::BaseVertex; return Shader::ReplaceConstant::BaseVertex;
case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance: case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseInstance:
return Shader::ReplaceConstant::BaseInstance; return Shader::ReplaceConstant::BaseInstance;
default: default:
UNREACHABLE(); UNREACHABLE();
@ -399,6 +400,7 @@ ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_com
stage = Shader::Stage::Compute; stage = Shader::Stage::Compute;
local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc; local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc;
texture_bound = kepler_compute->regs.tex_cb_index; texture_bound = kepler_compute->regs.tex_cb_index;
is_propietary_driver = texture_bound == 2;
shared_memory_size = qmd.shared_alloc; shared_memory_size = qmd.shared_alloc;
workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
} }
@ -498,6 +500,7 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
} }
} }
is_propietary_driver = texture_bound == 2;
} }
void FileEnvironment::Dump(u64 hash) { void FileEnvironment::Dump(u64 hash) {

View file

@ -646,7 +646,28 @@ bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
template <class P> template <class P>
void TextureCache<P>::CommitAsyncFlushes() { void TextureCache<P>::CommitAsyncFlushes() {
// This is intentionally passing the value by copy // This is intentionally passing the value by copy
committed_downloads.push(uncommitted_downloads); if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
const std::span<const ImageId> download_ids = uncommitted_downloads;
if (download_ids.empty()) {
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear();
async_buffers.emplace_back(std::optional<AsyncBuffer>{});
return;
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
}
async_buffers.emplace_back(download_map);
}
committed_downloads.emplace_back(std::move(uncommitted_downloads));
uncommitted_downloads.clear(); uncommitted_downloads.clear();
} }
@ -655,37 +676,58 @@ void TextureCache<P>::PopAsyncFlushes() {
if (committed_downloads.empty()) { if (committed_downloads.empty()) {
return; return;
} }
const std::span<const ImageId> download_ids = committed_downloads.front(); if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
if (download_ids.empty()) { const std::span<const ImageId> download_ids = committed_downloads.front();
committed_downloads.pop(); if (download_ids.empty()) {
return; committed_downloads.pop_front();
async_buffers.pop_front();
return;
}
auto download_map = *async_buffers.front();
std::span<u8> download_span = download_map.mapped_span;
for (size_t i = download_ids.size(); i > 0; i--) {
const ImageBase& image = slot_images[download_ids[i - 1]];
const auto copies = FullDownloadCopies(image.info);
download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
std::span<u8> download_span_alt = download_span.subspan(download_map.offset);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt,
swizzle_data_buffer);
}
runtime.FreeDeferredStagingBuffer(download_map);
committed_downloads.pop_front();
async_buffers.pop_front();
} else {
const std::span<const ImageId> download_ids = committed_downloads.front();
if (download_ids.empty()) {
committed_downloads.pop_front();
return;
}
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
const size_t original_offset = download_map.offset;
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += image.unswizzled_size_bytes;
}
// Wait for downloads to finish
runtime.Finish();
download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span;
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
committed_downloads.pop_front();
} }
size_t total_size_bytes = 0;
for (const ImageId image_id : download_ids) {
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
}
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
const size_t original_offset = download_map.offset;
for (const ImageId image_id : download_ids) {
Image& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(download_map, copies);
download_map.offset += image.unswizzled_size_bytes;
}
// Wait for downloads to finish
runtime.Finish();
download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span;
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
committed_downloads.pop();
} }
template <class P> template <class P>
@ -1475,6 +1517,27 @@ void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
} }
} }
template <class P>
void TextureCache<P>::BubbleUpImages(VAddr cpu_addr, size_t size) {
ForEachCPUPage(cpu_addr, size, [this](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
return;
}
std::vector<ImageMapId>& map_vector = it->second;
for (size_t i = 1; i < map_vector.size(); i++) {
ImageMapView& bottom_map = slot_map_views[map_vector[i - 1]];
ImageMapView& top_map = slot_map_views[map_vector[i]];
if (slot_images[bottom_map.image_id].modification_tick <
slot_images[top_map.image_id].modification_tick) {
std::swap(map_vector[i - 1], map_vector[i]);
} else {
return;
}
}
});
}
template <class P> template <class P>
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
@ -1788,6 +1851,7 @@ template <class P>
void TextureCache<P>::MarkModification(ImageBase& image) noexcept { void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
image.flags |= ImageFlagBits::GpuModified; image.flags |= ImageFlagBits::GpuModified;
image.modification_tick = ++modification_tick; image.modification_tick = ++modification_tick;
BubbleUpImages(image.cpu_addr, image.guest_size_bytes);
} }
template <class P> template <class P>

View file

@ -92,6 +92,8 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device. /// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// True when the API can do asynchronous texture downloads.
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()}; static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
@ -106,6 +108,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
using ImageView = typename P::ImageView; using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler; using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer; using Framebuffer = typename P::Framebuffer;
using AsyncBuffer = typename P::AsyncBuffer;
struct BlitImages { struct BlitImages {
ImageId dst_id; ImageId dst_id;
@ -316,6 +319,8 @@ private:
template <typename Func> template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func); void ForEachSparseSegment(ImageBase& image, Func&& func);
void BubbleUpImages(VAddr cpu_addr, size_t size);
/// Find or create an image view in the given image with the passed parameters /// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
@ -403,7 +408,8 @@ private:
// TODO: This data structure is not optimal and it should be reworked // TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads; std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads; std::deque<std::vector<ImageId>> committed_downloads;
std::deque<std::optional<AsyncBuffer>> async_buffers;
struct LRUItemParams { struct LRUItemParams {
using ObjectType = ImageId; using ObjectType = ImageId;

View file

@ -756,20 +756,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
CollectToolingInfo(); CollectToolingInfo();
if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
switch (arch) {
case NvidiaArchitecture::AmpereOrNewer:
LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
is_float16_supported = false;
break;
case NvidiaArchitecture::Turing:
break;
case NvidiaArchitecture::VoltaOrOlder:
LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor");
khr_push_descriptor = false;
break;
}
const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff; const u32 nv_major_version = (properties.driverVersion >> 22) & 0x3ff;
if (nv_major_version < 527) {
const auto arch = GetNvidiaArchitecture(physical, supported_extensions);
switch (arch) {
case NvidiaArchitecture::AmpereOrNewer:
LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math");
is_float16_supported = false;
break;
case NvidiaArchitecture::Turing:
break;
case NvidiaArchitecture::VoltaOrOlder:
LOG_WARNING(Render_Vulkan,
"Blacklisting Volta and older from VK_KHR_push_descriptor");
khr_push_descriptor = false;
break;
}
}
if (nv_major_version >= 510) { if (nv_major_version >= 510) {
LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits"); LOG_WARNING(Render_Vulkan, "NVIDIA Drivers >= 510 do not support MSAA image blits");
cant_blit_msaa = true; cant_blit_msaa = true;
@ -834,8 +837,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
if (ext_vertex_input_dynamic_state && is_intel_windows) { if (ext_vertex_input_dynamic_state && is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); const u32 version = (properties.driverVersion << 3) >> 3;
ext_vertex_input_dynamic_state = false; if (version < VK_MAKE_API_VERSION(27, 20, 100, 0)) {
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
ext_vertex_input_dynamic_state = false;
}
} }
if (is_float16_supported && is_intel_windows) { if (is_float16_supported && is_intel_windows) {
// Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being.