2022-11-05 13:58:44 +01:00
|
|
|
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <filesystem>
|
|
|
|
#include <fstream>
|
|
|
|
#include <memory>
|
|
|
|
#include <optional>
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
#include "common/assert.h"
|
|
|
|
#include "common/cityhash.h"
|
|
|
|
#include "common/common_types.h"
|
|
|
|
#include "common/div_ceil.h"
|
|
|
|
#include "common/fs/fs.h"
|
|
|
|
#include "common/fs/path_util.h"
|
|
|
|
#include "common/logging/log.h"
|
2022-11-22 21:10:41 +01:00
|
|
|
#include "common/polyfill_ranges.h"
|
2022-11-05 13:58:44 +01:00
|
|
|
#include "shader_recompiler/environment.h"
|
|
|
|
#include "video_core/engines/kepler_compute.h"
|
|
|
|
#include "video_core/memory_manager.h"
|
|
|
|
#include "video_core/shader_environment.h"
|
2022-11-05 19:21:57 +01:00
|
|
|
#include "video_core/texture_cache/format_lookup_table.h"
|
2022-11-05 13:58:44 +01:00
|
|
|
#include "video_core/textures/texture.h"
|
|
|
|
|
|
|
|
namespace VideoCommon {
|
|
|
|
|
|
|
|
constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
|
|
|
|
|
|
|
|
constexpr size_t INST_SIZE = sizeof(u64);
|
|
|
|
|
|
|
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
|
|
|
|
|
|
|
static u64 MakeCbufKey(u32 index, u32 offset) {
|
|
|
|
return (static_cast<u64>(index) << 32) | offset;
|
|
|
|
}
|
|
|
|
|
2022-11-05 19:21:57 +01:00
|
|
|
static Shader::TextureType ConvertTextureType(const Tegra::Texture::TICEntry& entry) {
|
2022-11-05 13:58:44 +01:00
|
|
|
switch (entry.texture_type) {
|
|
|
|
case Tegra::Texture::TextureType::Texture1D:
|
|
|
|
return Shader::TextureType::Color1D;
|
|
|
|
case Tegra::Texture::TextureType::Texture2D:
|
|
|
|
case Tegra::Texture::TextureType::Texture2DNoMipmap:
|
|
|
|
return entry.normalized_coords ? Shader::TextureType::Color2D
|
|
|
|
: Shader::TextureType::Color2DRect;
|
|
|
|
case Tegra::Texture::TextureType::Texture3D:
|
|
|
|
return Shader::TextureType::Color3D;
|
|
|
|
case Tegra::Texture::TextureType::TextureCubemap:
|
|
|
|
return Shader::TextureType::ColorCube;
|
|
|
|
case Tegra::Texture::TextureType::Texture1DArray:
|
|
|
|
return Shader::TextureType::ColorArray1D;
|
|
|
|
case Tegra::Texture::TextureType::Texture2DArray:
|
|
|
|
return Shader::TextureType::ColorArray2D;
|
|
|
|
case Tegra::Texture::TextureType::Texture1DBuffer:
|
|
|
|
return Shader::TextureType::Buffer;
|
|
|
|
case Tegra::Texture::TextureType::TextureCubeArray:
|
|
|
|
return Shader::TextureType::ColorArrayCube;
|
|
|
|
default:
|
|
|
|
UNIMPLEMENTED();
|
|
|
|
return Shader::TextureType::Color2D;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-05 19:21:57 +01:00
|
|
|
static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture::TICEntry& entry) {
|
|
|
|
switch (PixelFormatFromTextureInfo(entry.format, entry.r_type, entry.g_type, entry.b_type,
|
|
|
|
entry.a_type, entry.srgb_conversion)) {
|
|
|
|
case VideoCore::Surface::PixelFormat::A8B8G8R8_SNORM:
|
|
|
|
return Shader::TexturePixelFormat::A8B8G8R8_SNORM;
|
|
|
|
case VideoCore::Surface::PixelFormat::R8_SNORM:
|
|
|
|
return Shader::TexturePixelFormat::R8_SNORM;
|
|
|
|
case VideoCore::Surface::PixelFormat::R8G8_SNORM:
|
|
|
|
return Shader::TexturePixelFormat::R8G8_SNORM;
|
|
|
|
case VideoCore::Surface::PixelFormat::R16G16B16A16_SNORM:
|
|
|
|
return Shader::TexturePixelFormat::R16G16B16A16_SNORM;
|
|
|
|
case VideoCore::Surface::PixelFormat::R16G16_SNORM:
|
|
|
|
return Shader::TexturePixelFormat::R16G16_SNORM;
|
|
|
|
case VideoCore::Surface::PixelFormat::R16_SNORM:
|
|
|
|
return Shader::TexturePixelFormat::R16_SNORM;
|
|
|
|
default:
|
|
|
|
return Shader::TexturePixelFormat::OTHER;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-05 13:58:44 +01:00
|
|
|
static std::string_view StageToPrefix(Shader::Stage stage) {
|
|
|
|
switch (stage) {
|
|
|
|
case Shader::Stage::VertexB:
|
|
|
|
return "VB";
|
|
|
|
case Shader::Stage::TessellationControl:
|
|
|
|
return "TC";
|
|
|
|
case Shader::Stage::TessellationEval:
|
|
|
|
return "TE";
|
|
|
|
case Shader::Stage::Geometry:
|
|
|
|
return "GS";
|
|
|
|
case Shader::Stage::Fragment:
|
|
|
|
return "FS";
|
|
|
|
case Shader::Stage::Compute:
|
|
|
|
return "CS";
|
|
|
|
case Shader::Stage::VertexA:
|
|
|
|
return "VA";
|
|
|
|
default:
|
|
|
|
return "UK";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest,
|
|
|
|
u32 initial_offset, Shader::Stage stage) {
|
|
|
|
const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
|
|
|
|
const auto base_dir{shader_dir / "shaders"};
|
|
|
|
if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
|
|
|
|
LOG_ERROR(Common_Filesystem, "Failed to create shader dump directories");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
const auto prefix = StageToPrefix(stage);
|
|
|
|
const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)};
|
|
|
|
const size_t real_size = read_highest - read_lowest + initial_offset;
|
|
|
|
const size_t padding_needed = ((32 - (real_size % 32)) % 32);
|
|
|
|
std::fstream shader_file(name, std::ios::out | std::ios::binary);
|
|
|
|
const size_t jump_index = initial_offset / sizeof(u64);
|
|
|
|
shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size);
|
|
|
|
for (size_t i = 0; i < padding_needed; i++) {
|
|
|
|
shader_file.put(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
|
|
|
|
u32 start_address_)
|
|
|
|
: gpu_memory{&gpu_memory_}, program_base{program_base_} {
|
|
|
|
start_address = start_address_;
|
|
|
|
}
|
|
|
|
|
|
|
|
GenericEnvironment::~GenericEnvironment() = default;
|
|
|
|
|
|
|
|
u32 GenericEnvironment::TextureBoundBuffer() const {
|
|
|
|
return texture_bound;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 GenericEnvironment::LocalMemorySize() const {
|
|
|
|
return local_memory_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 GenericEnvironment::SharedMemorySize() const {
|
|
|
|
return shared_memory_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
|
|
|
|
return workgroup_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
u64 GenericEnvironment::ReadInstruction(u32 address) {
|
|
|
|
read_lowest = std::min(read_lowest, address);
|
|
|
|
read_highest = std::max(read_highest, address);
|
|
|
|
|
|
|
|
if (address >= cached_lowest && address < cached_highest) {
|
|
|
|
return code[(address - cached_lowest) / INST_SIZE];
|
|
|
|
}
|
|
|
|
has_unbound_instructions = true;
|
|
|
|
return gpu_memory->Read<u64>(program_base + address);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<u64> GenericEnvironment::Analyze() {
|
|
|
|
const std::optional<u64> size{TryFindSize()};
|
|
|
|
if (!size) {
|
|
|
|
return std::nullopt;
|
|
|
|
}
|
|
|
|
cached_lowest = start_address;
|
|
|
|
cached_highest = start_address + static_cast<u32>(*size);
|
|
|
|
return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GenericEnvironment::SetCachedSize(size_t size_bytes) {
|
|
|
|
cached_lowest = start_address;
|
|
|
|
cached_highest = start_address + static_cast<u32>(size_bytes);
|
|
|
|
code.resize(CachedSize());
|
|
|
|
gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t GenericEnvironment::CachedSize() const noexcept {
|
|
|
|
return cached_highest - cached_lowest + INST_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t GenericEnvironment::ReadSize() const noexcept {
|
|
|
|
return read_highest - read_lowest + INST_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool GenericEnvironment::CanBeSerialized() const noexcept {
|
|
|
|
return !has_unbound_instructions;
|
|
|
|
}
|
|
|
|
|
|
|
|
u64 GenericEnvironment::CalculateHash() const {
|
|
|
|
const size_t size{ReadSize()};
|
|
|
|
const auto data{std::make_unique<char[]>(size)};
|
|
|
|
gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
|
|
|
|
return Common::CityHash64(data.get(), size);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GenericEnvironment::Dump(u64 hash) {
|
|
|
|
DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage);
|
|
|
|
}
|
|
|
|
|
|
|
|
void GenericEnvironment::Serialize(std::ofstream& file) const {
|
|
|
|
const u64 code_size{static_cast<u64>(CachedSize())};
|
|
|
|
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
|
2022-11-05 19:21:57 +01:00
|
|
|
const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
|
2022-11-05 13:58:44 +01:00
|
|
|
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
|
2022-12-25 07:12:57 +01:00
|
|
|
const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())};
|
2022-11-05 13:58:44 +01:00
|
|
|
|
|
|
|
file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
|
|
|
|
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
|
2022-11-05 19:21:57 +01:00
|
|
|
.write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
|
|
|
|
sizeof(num_texture_pixel_formats))
|
2022-11-05 13:58:44 +01:00
|
|
|
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
2022-12-25 07:12:57 +01:00
|
|
|
.write(reinterpret_cast<const char*>(&num_cbuf_replacement_values),
|
|
|
|
sizeof(num_cbuf_replacement_values))
|
2022-11-05 13:58:44 +01:00
|
|
|
.write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
|
|
|
|
.write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
|
|
|
|
.write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
|
|
|
|
.write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
|
|
|
|
.write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
|
2022-11-05 19:21:57 +01:00
|
|
|
.write(reinterpret_cast<const char*>(&viewport_transform_state),
|
|
|
|
sizeof(viewport_transform_state))
|
2022-11-05 13:58:44 +01:00
|
|
|
.write(reinterpret_cast<const char*>(&stage), sizeof(stage))
|
|
|
|
.write(reinterpret_cast<const char*>(code.data()), code_size);
|
|
|
|
for (const auto& [key, type] : texture_types) {
|
|
|
|
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
|
|
|
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
|
|
|
}
|
2022-11-05 19:21:57 +01:00
|
|
|
for (const auto& [key, format] : texture_pixel_formats) {
|
|
|
|
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
|
|
|
.write(reinterpret_cast<const char*>(&format), sizeof(format));
|
|
|
|
}
|
2022-11-05 13:58:44 +01:00
|
|
|
for (const auto& [key, type] : cbuf_values) {
|
|
|
|
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
|
|
|
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
|
|
|
}
|
2022-12-25 07:12:57 +01:00
|
|
|
for (const auto& [key, type] : cbuf_replacements) {
|
|
|
|
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
|
|
|
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
|
|
|
}
|
2022-11-05 13:58:44 +01:00
|
|
|
if (stage == Shader::Stage::Compute) {
|
|
|
|
file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
|
|
|
|
.write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
|
|
|
|
} else {
|
|
|
|
file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
|
|
|
|
if (stage == Shader::Stage::Geometry) {
|
|
|
|
file.write(reinterpret_cast<const char*>(&gp_passthrough_mask),
|
|
|
|
sizeof(gp_passthrough_mask));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<u64> GenericEnvironment::TryFindSize() {
|
|
|
|
static constexpr size_t BLOCK_SIZE = 0x1000;
|
|
|
|
static constexpr size_t MAXIMUM_SIZE = 0x100000;
|
|
|
|
|
|
|
|
static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
|
|
|
|
static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
|
|
|
|
|
|
|
|
GPUVAddr guest_addr{program_base + start_address};
|
|
|
|
size_t offset{0};
|
|
|
|
size_t size{BLOCK_SIZE};
|
|
|
|
while (size <= MAXIMUM_SIZE) {
|
|
|
|
code.resize(size / INST_SIZE);
|
|
|
|
u64* const data = code.data() + offset / INST_SIZE;
|
|
|
|
gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
|
|
|
|
for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
|
|
|
|
const u64 inst = data[index / INST_SIZE];
|
|
|
|
if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
|
|
|
|
return offset + index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
guest_addr += BLOCK_SIZE;
|
|
|
|
size += BLOCK_SIZE;
|
|
|
|
offset += BLOCK_SIZE;
|
|
|
|
}
|
|
|
|
return std::nullopt;
|
|
|
|
}
|
|
|
|
|
2022-11-05 19:21:57 +01:00
|
|
|
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
|
|
|
|
bool via_header_index, u32 raw) {
|
2022-11-05 13:58:44 +01:00
|
|
|
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
|
|
|
|
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
|
|
|
|
Tegra::Texture::TICEntry entry;
|
|
|
|
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
|
2022-11-05 19:21:57 +01:00
|
|
|
return entry;
|
2022-11-05 13:58:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
|
|
|
|
Tegra::MemoryManager& gpu_memory_,
|
|
|
|
Maxwell::ShaderType program, GPUVAddr program_base_,
|
|
|
|
u32 start_address_)
|
|
|
|
: GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
|
|
|
|
gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
|
|
|
|
initial_offset = sizeof(sph);
|
|
|
|
gp_passthrough_mask = maxwell3d->regs.post_vtg_shader_attrib_skip_mask;
|
|
|
|
switch (program) {
|
|
|
|
case Maxwell::ShaderType::VertexA:
|
|
|
|
stage = Shader::Stage::VertexA;
|
|
|
|
stage_index = 0;
|
|
|
|
break;
|
|
|
|
case Maxwell::ShaderType::VertexB:
|
|
|
|
stage = Shader::Stage::VertexB;
|
|
|
|
stage_index = 0;
|
|
|
|
break;
|
|
|
|
case Maxwell::ShaderType::TessellationInit:
|
|
|
|
stage = Shader::Stage::TessellationControl;
|
|
|
|
stage_index = 1;
|
|
|
|
break;
|
|
|
|
case Maxwell::ShaderType::Tessellation:
|
|
|
|
stage = Shader::Stage::TessellationEval;
|
|
|
|
stage_index = 2;
|
|
|
|
break;
|
|
|
|
case Maxwell::ShaderType::Geometry:
|
|
|
|
stage = Shader::Stage::Geometry;
|
|
|
|
stage_index = 3;
|
|
|
|
break;
|
|
|
|
case Maxwell::ShaderType::Pixel:
|
|
|
|
stage = Shader::Stage::Fragment;
|
|
|
|
stage_index = 4;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT_MSG(false, "Invalid program={}", program);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
const u64 local_size{sph.LocalMemorySize()};
|
|
|
|
ASSERT(local_size <= std::numeric_limits<u32>::max());
|
|
|
|
local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
|
|
|
|
texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot;
|
2023-01-03 22:43:57 +01:00
|
|
|
is_propietary_driver = texture_bound == 2;
|
2022-12-25 07:12:57 +01:00
|
|
|
has_hle_engine_state =
|
|
|
|
maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro;
|
2022-11-05 13:58:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
|
|
|
const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
|
|
|
|
ASSERT(cbuf.enabled);
|
|
|
|
u32 value{};
|
|
|
|
if (cbuf_offset < cbuf.size) {
|
|
|
|
value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
|
|
|
|
}
|
|
|
|
cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
2022-12-25 07:12:57 +01:00
|
|
|
std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffer(u32 bank,
|
|
|
|
u32 offset) {
|
|
|
|
if (!has_hle_engine_state) {
|
|
|
|
return std::nullopt;
|
|
|
|
}
|
|
|
|
const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset);
|
|
|
|
auto it = maxwell3d->replace_table.find(key);
|
|
|
|
if (it == maxwell3d->replace_table.end()) {
|
|
|
|
return std::nullopt;
|
|
|
|
}
|
2023-01-03 22:43:57 +01:00
|
|
|
const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplacementAttributeType name) {
|
2022-12-25 07:12:57 +01:00
|
|
|
switch (name) {
|
2023-01-03 22:43:57 +01:00
|
|
|
case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseVertex:
|
2022-12-25 07:12:57 +01:00
|
|
|
return Shader::ReplaceConstant::BaseVertex;
|
2023-01-03 22:43:57 +01:00
|
|
|
case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::BaseInstance:
|
2022-12-25 07:12:57 +01:00
|
|
|
return Shader::ReplaceConstant::BaseInstance;
|
2023-01-05 05:15:14 +01:00
|
|
|
case Tegra::Engines::Maxwell3D::HLEReplacementAttributeType::DrawID:
|
|
|
|
return Shader::ReplaceConstant::DrawID;
|
2022-12-25 07:12:57 +01:00
|
|
|
default:
|
|
|
|
UNREACHABLE();
|
|
|
|
}
|
|
|
|
}(it->second);
|
|
|
|
cbuf_replacements.emplace(key, converted_value);
|
|
|
|
return converted_value;
|
|
|
|
}
|
|
|
|
|
2022-11-05 13:58:44 +01:00
|
|
|
Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
|
|
|
|
const auto& regs{maxwell3d->regs};
|
|
|
|
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
|
2022-11-05 19:21:57 +01:00
|
|
|
auto entry =
|
|
|
|
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
|
|
|
|
const Shader::TextureType result{ConvertTextureType(entry)};
|
|
|
|
texture_types.emplace(handle, result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
Shader::TexturePixelFormat GraphicsEnvironment::ReadTexturePixelFormat(u32 handle) {
|
|
|
|
const auto& regs{maxwell3d->regs};
|
|
|
|
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
|
|
|
|
auto entry =
|
|
|
|
ReadTextureInfo(regs.tex_header.Address(), regs.tex_header.limit, via_header_index, handle);
|
|
|
|
const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry));
|
|
|
|
texture_pixel_formats.emplace(handle, result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 GraphicsEnvironment::ReadViewportTransformState() {
|
|
|
|
const auto& regs{maxwell3d->regs};
|
|
|
|
viewport_transform_state = regs.viewport_scale_offset_enabled;
|
|
|
|
return viewport_transform_state;
|
2022-11-05 13:58:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
|
|
|
|
Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
|
|
|
|
u32 start_address_)
|
|
|
|
: GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
|
|
|
|
&kepler_compute_} {
|
|
|
|
const auto& qmd{kepler_compute->launch_description};
|
|
|
|
stage = Shader::Stage::Compute;
|
|
|
|
local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc;
|
|
|
|
texture_bound = kepler_compute->regs.tex_cb_index;
|
2023-01-03 22:43:57 +01:00
|
|
|
is_propietary_driver = texture_bound == 2;
|
2022-11-05 13:58:44 +01:00
|
|
|
shared_memory_size = qmd.shared_alloc;
|
|
|
|
workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
|
|
|
const auto& qmd{kepler_compute->launch_description};
|
|
|
|
ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
|
|
|
|
const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
|
|
|
|
u32 value{};
|
|
|
|
if (cbuf_offset < cbuf.size) {
|
|
|
|
value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
|
|
|
|
}
|
|
|
|
cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
|
|
|
|
const auto& regs{kepler_compute->regs};
|
|
|
|
const auto& qmd{kepler_compute->launch_description};
|
2022-11-05 19:21:57 +01:00
|
|
|
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
|
|
|
|
const Shader::TextureType result{ConvertTextureType(entry)};
|
|
|
|
texture_types.emplace(handle, result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
Shader::TexturePixelFormat ComputeEnvironment::ReadTexturePixelFormat(u32 handle) {
|
|
|
|
const auto& regs{kepler_compute->regs};
|
|
|
|
const auto& qmd{kepler_compute->launch_description};
|
|
|
|
auto entry = ReadTextureInfo(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
|
|
|
|
const Shader::TexturePixelFormat result(ConvertTexturePixelFormat(entry));
|
|
|
|
texture_pixel_formats.emplace(handle, result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 ComputeEnvironment::ReadViewportTransformState() {
|
|
|
|
return viewport_transform_state;
|
2022-11-05 13:58:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void FileEnvironment::Deserialize(std::ifstream& file) {
|
|
|
|
u64 code_size{};
|
|
|
|
u64 num_texture_types{};
|
2022-11-05 19:21:57 +01:00
|
|
|
u64 num_texture_pixel_formats{};
|
2022-11-05 13:58:44 +01:00
|
|
|
u64 num_cbuf_values{};
|
2022-12-25 07:12:57 +01:00
|
|
|
u64 num_cbuf_replacement_values{};
|
2022-11-05 13:58:44 +01:00
|
|
|
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
|
|
|
|
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
|
2022-11-05 19:21:57 +01:00
|
|
|
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
|
|
|
|
sizeof(num_texture_pixel_formats))
|
2022-11-05 13:58:44 +01:00
|
|
|
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
2022-12-25 07:12:57 +01:00
|
|
|
.read(reinterpret_cast<char*>(&num_cbuf_replacement_values),
|
|
|
|
sizeof(num_cbuf_replacement_values))
|
2022-11-05 13:58:44 +01:00
|
|
|
.read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
|
|
|
|
.read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
|
|
|
|
.read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
|
|
|
|
.read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
|
|
|
|
.read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
|
2022-11-05 19:21:57 +01:00
|
|
|
.read(reinterpret_cast<char*>(&viewport_transform_state), sizeof(viewport_transform_state))
|
2022-11-05 13:58:44 +01:00
|
|
|
.read(reinterpret_cast<char*>(&stage), sizeof(stage));
|
|
|
|
code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
|
|
|
|
file.read(reinterpret_cast<char*>(code.get()), code_size);
|
|
|
|
for (size_t i = 0; i < num_texture_types; ++i) {
|
|
|
|
u32 key;
|
|
|
|
Shader::TextureType type;
|
|
|
|
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
|
|
|
.read(reinterpret_cast<char*>(&type), sizeof(type));
|
|
|
|
texture_types.emplace(key, type);
|
|
|
|
}
|
2022-11-05 19:21:57 +01:00
|
|
|
for (size_t i = 0; i < num_texture_pixel_formats; ++i) {
|
|
|
|
u32 key;
|
|
|
|
Shader::TexturePixelFormat format;
|
|
|
|
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
|
|
|
.read(reinterpret_cast<char*>(&format), sizeof(format));
|
|
|
|
texture_pixel_formats.emplace(key, format);
|
|
|
|
}
|
2022-11-05 13:58:44 +01:00
|
|
|
for (size_t i = 0; i < num_cbuf_values; ++i) {
|
|
|
|
u64 key;
|
|
|
|
u32 value;
|
|
|
|
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
|
|
|
.read(reinterpret_cast<char*>(&value), sizeof(value));
|
|
|
|
cbuf_values.emplace(key, value);
|
|
|
|
}
|
2022-12-25 07:12:57 +01:00
|
|
|
for (size_t i = 0; i < num_cbuf_replacement_values; ++i) {
|
|
|
|
u64 key;
|
|
|
|
Shader::ReplaceConstant value;
|
|
|
|
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
|
|
|
.read(reinterpret_cast<char*>(&value), sizeof(value));
|
|
|
|
cbuf_replacements.emplace(key, value);
|
|
|
|
}
|
2022-11-05 13:58:44 +01:00
|
|
|
if (stage == Shader::Stage::Compute) {
|
|
|
|
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
|
|
|
|
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
|
|
|
|
initial_offset = 0;
|
|
|
|
} else {
|
|
|
|
file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
|
|
|
|
initial_offset = sizeof(sph);
|
|
|
|
if (stage == Shader::Stage::Geometry) {
|
|
|
|
file.read(reinterpret_cast<char*>(&gp_passthrough_mask), sizeof(gp_passthrough_mask));
|
|
|
|
}
|
|
|
|
}
|
2023-01-03 22:43:57 +01:00
|
|
|
is_propietary_driver = texture_bound == 2;
|
2022-11-05 13:58:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void FileEnvironment::Dump(u64 hash) {
|
|
|
|
DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage);
|
|
|
|
}
|
|
|
|
|
|
|
|
u64 FileEnvironment::ReadInstruction(u32 address) {
|
|
|
|
if (address < read_lowest || address > read_highest) {
|
|
|
|
throw Shader::LogicError("Out of bounds address {}", address);
|
|
|
|
}
|
|
|
|
return code[(address - read_lowest) / sizeof(u64)];
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
|
|
|
const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
|
|
|
|
if (it == cbuf_values.end()) {
|
|
|
|
throw Shader::LogicError("Uncached read texture type");
|
|
|
|
}
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
|
|
|
|
const auto it{texture_types.find(handle)};
|
|
|
|
if (it == texture_types.end()) {
|
|
|
|
throw Shader::LogicError("Uncached read texture type");
|
|
|
|
}
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
|
2022-11-05 19:21:57 +01:00
|
|
|
Shader::TexturePixelFormat FileEnvironment::ReadTexturePixelFormat(u32 handle) {
|
|
|
|
const auto it{texture_pixel_formats.find(handle)};
|
|
|
|
if (it == texture_pixel_formats.end()) {
|
|
|
|
throw Shader::LogicError("Uncached read texture pixel format");
|
|
|
|
}
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 FileEnvironment::ReadViewportTransformState() {
|
|
|
|
return viewport_transform_state;
|
|
|
|
}
|
|
|
|
|
2022-11-05 13:58:44 +01:00
|
|
|
u32 FileEnvironment::LocalMemorySize() const {
|
|
|
|
return local_memory_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 FileEnvironment::SharedMemorySize() const {
|
|
|
|
return shared_memory_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 FileEnvironment::TextureBoundBuffer() const {
|
|
|
|
return texture_bound;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
|
|
|
|
return workgroup_size;
|
|
|
|
}
|
|
|
|
|
2022-12-25 07:12:57 +01:00
|
|
|
std::optional<Shader::ReplaceConstant> FileEnvironment::GetReplaceConstBuffer(u32 bank,
|
|
|
|
u32 offset) {
|
|
|
|
const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset);
|
|
|
|
auto it = cbuf_replacements.find(key);
|
|
|
|
if (it == cbuf_replacements.end()) {
|
|
|
|
return std::nullopt;
|
|
|
|
}
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
|
2022-11-05 13:58:44 +01:00
|
|
|
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
|
|
|
|
const std::filesystem::path& filename, u32 cache_version) try {
|
|
|
|
std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
|
|
|
|
file.exceptions(std::ifstream::failbit);
|
|
|
|
if (!file.is_open()) {
|
|
|
|
LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
|
|
|
|
Common::FS::PathToUTF8String(filename));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (file.tellp() == 0) {
|
|
|
|
// Write header
|
|
|
|
file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
|
|
|
|
.write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
|
|
|
|
}
|
|
|
|
if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
const u32 num_envs{static_cast<u32>(envs.size())};
|
|
|
|
file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
|
|
|
|
for (const GenericEnvironment* const env : envs) {
|
|
|
|
env->Serialize(file);
|
|
|
|
}
|
|
|
|
file.write(key.data(), key.size_bytes());
|
|
|
|
|
|
|
|
} catch (const std::ios_base::failure& e) {
|
|
|
|
LOG_ERROR(Common_Filesystem, "{}", e.what());
|
|
|
|
if (!Common::FS::RemoveFile(filename)) {
|
|
|
|
LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
|
|
|
|
Common::FS::PathToUTF8String(filename));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void LoadPipelines(
|
|
|
|
std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version,
|
|
|
|
Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
|
|
|
|
Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
|
|
|
|
std::ifstream file(filename, std::ios::binary | std::ios::ate);
|
|
|
|
if (!file.is_open()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
file.exceptions(std::ifstream::failbit);
|
|
|
|
const auto end{file.tellg()};
|
|
|
|
file.seekg(0, std::ios::beg);
|
|
|
|
|
|
|
|
std::array<char, 8> magic_number;
|
|
|
|
u32 cache_version;
|
|
|
|
file.read(magic_number.data(), magic_number.size())
|
|
|
|
.read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
|
|
|
|
if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) {
|
|
|
|
file.close();
|
|
|
|
if (Common::FS::RemoveFile(filename)) {
|
|
|
|
if (magic_number != MAGIC_NUMBER) {
|
|
|
|
LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
|
|
|
|
}
|
|
|
|
if (cache_version != expected_cache_version) {
|
|
|
|
LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
LOG_ERROR(Common_Filesystem,
|
|
|
|
"Invalid pipeline cache file and failed to delete it in \"{}\"",
|
|
|
|
Common::FS::PathToUTF8String(filename));
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
while (file.tellg() != end) {
|
|
|
|
if (stop_loading.stop_requested()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
u32 num_envs{};
|
|
|
|
file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
|
|
|
|
std::vector<FileEnvironment> envs(num_envs);
|
|
|
|
for (FileEnvironment& env : envs) {
|
|
|
|
env.Deserialize(file);
|
|
|
|
}
|
|
|
|
if (envs.front().ShaderStage() == Shader::Stage::Compute) {
|
|
|
|
load_compute(file, std::move(envs.front()));
|
|
|
|
} else {
|
|
|
|
load_graphics(file, std::move(envs));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (const std::ios_base::failure& e) {
|
|
|
|
LOG_ERROR(Common_Filesystem, "{}", e.what());
|
|
|
|
if (!Common::FS::RemoveFile(filename)) {
|
|
|
|
LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
|
|
|
|
Common::FS::PathToUTF8String(filename));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace VideoCommon
|