Ryujinx/src/Ryujinx.Graphics.Vulkan/BufferManager.cs
riperiperi 492a046335
Vulkan: Buffer Mirrors for MacOS performance (#4899)
* Initial implementation of buffer mirrors

Generally slower right now, goal is to reduce render passes in games that do inline updates

Fix support buffer mirrors

Reintroduce vertex buffer mirror

Add storage buffer support

Optimisation part 1

More optimisation

Avoid useless data copies.

Remove unused cbIndex stuff

Properly set write flag for storage buffers.

Fix minor issues

Not sure why this was here.

Fix BufferRangeList

Fix some big issues

Align storage buffers rather than getting full buffer as a range

Improves mirrorability of read-only storage buffers

Increase staging buffer size, as it now contains mirrors

Fix some issues with buffers not updating

Fix buffer SetDataUnchecked offset for one of the paths when using mirrors

Fix buffer mirrors interaction with buffer textures

Fix mirror rebinding

Move GetBuffer calls on indirect draws before BeginRenderPass to avoid draws without render pass

Fix mirrors rebase

Fix rebase 2023

* Fix crash when using stale vertex buffer

Similar to `Get` with a size that's too large, just treat it as a clamp.

* Explicitly set support buffer as mirrorable

* Address feedback

* Remove unused fragment of MVK workaround

* Replace logging for staging buffer OOM

* Address format issues

* Address more format issues

* Mini cleanup

* Address more things

* Rename BufferRangeList

* Support bounding range for ClearMirrors and UploadPendingData

* Add maximum size for vertex buffer mirrors

* Enable index buffer mirrors

Enabled on all platforms for the IbStreamer.

* Feedback

* Remove mystery BufferCache change

Probably macos related?

* Fix mirrors not creating when staging buffer is empty.

* Change log level to debug
2023-08-14 14:18:47 -03:00

540 lines
19 KiB
C#

using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Silk.NET.Vulkan;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using VkBuffer = Silk.NET.Vulkan.Buffer;
using VkFormat = Silk.NET.Vulkan.Format;
namespace Ryujinx.Graphics.Vulkan
{
class BufferManager : IDisposable
{
public const MemoryPropertyFlags DefaultBufferMemoryFlags =
MemoryPropertyFlags.HostVisibleBit |
MemoryPropertyFlags.HostCoherentBit |
MemoryPropertyFlags.HostCachedBit;
// Some drivers don't expose a "HostCached" memory type,
// so we need those alternative flags for the allocation to succeed there.
private const MemoryPropertyFlags DefaultBufferMemoryNoCacheFlags =
MemoryPropertyFlags.HostVisibleBit |
MemoryPropertyFlags.HostCoherentBit;
private const MemoryPropertyFlags DeviceLocalBufferMemoryFlags =
MemoryPropertyFlags.DeviceLocalBit;
private const MemoryPropertyFlags DeviceLocalMappedBufferMemoryFlags =
MemoryPropertyFlags.DeviceLocalBit |
MemoryPropertyFlags.HostVisibleBit |
MemoryPropertyFlags.HostCoherentBit;
private const BufferUsageFlags DefaultBufferUsageFlags =
BufferUsageFlags.TransferSrcBit |
BufferUsageFlags.TransferDstBit |
BufferUsageFlags.UniformTexelBufferBit |
BufferUsageFlags.StorageTexelBufferBit |
BufferUsageFlags.UniformBufferBit |
BufferUsageFlags.StorageBufferBit |
BufferUsageFlags.IndexBufferBit |
BufferUsageFlags.VertexBufferBit |
BufferUsageFlags.TransformFeedbackBufferBitExt;
private const BufferUsageFlags HostImportedBufferUsageFlags =
BufferUsageFlags.TransferSrcBit |
BufferUsageFlags.TransferDstBit;
private readonly Device _device;
private readonly IdList<BufferHolder> _buffers;
public int BufferCount { get; private set; }
public StagingBuffer StagingBuffer { get; }
public MemoryRequirements HostImportedBufferMemoryRequirements { get; }
public BufferManager(VulkanRenderer gd, Device device)
{
_device = device;
_buffers = new IdList<BufferHolder>();
StagingBuffer = new StagingBuffer(gd, this);
HostImportedBufferMemoryRequirements = GetHostImportedUsageRequirements(gd);
}
public unsafe BufferHandle CreateHostImported(VulkanRenderer gd, nint pointer, int size)
{
var usage = HostImportedBufferUsageFlags;
if (gd.Capabilities.SupportsIndirectParameters)
{
usage |= BufferUsageFlags.IndirectBufferBit;
}
var bufferCreateInfo = new BufferCreateInfo
{
SType = StructureType.BufferCreateInfo,
Size = (ulong)size,
Usage = usage,
SharingMode = SharingMode.Exclusive,
};
gd.Api.CreateBuffer(_device, in bufferCreateInfo, null, out var buffer).ThrowOnError();
(Auto<MemoryAllocation> allocation, ulong offset) = gd.HostMemoryAllocator.GetExistingAllocation(pointer, (ulong)size);
gd.Api.BindBufferMemory(_device, buffer, allocation.GetUnsafe().Memory, allocation.GetUnsafe().Offset + offset);
var holder = new BufferHolder(gd, _device, buffer, allocation, size, BufferAllocationType.HostMapped, BufferAllocationType.HostMapped, (int)offset);
BufferCount++;
ulong handle64 = (uint)_buffers.Add(holder);
return Unsafe.As<ulong, BufferHandle>(ref handle64);
}
public BufferHandle CreateWithHandle(
VulkanRenderer gd,
int size,
BufferAllocationType baseType = BufferAllocationType.HostMapped,
BufferHandle storageHint = default,
bool forceMirrors = false)
{
return CreateWithHandle(gd, size, out _, baseType, storageHint, forceMirrors);
}
public BufferHandle CreateWithHandle(
VulkanRenderer gd,
int size,
out BufferHolder holder,
BufferAllocationType baseType = BufferAllocationType.HostMapped,
BufferHandle storageHint = default,
bool forceMirrors = false)
{
holder = Create(gd, size, baseType: baseType, storageHint: storageHint);
if (holder == null)
{
return BufferHandle.Null;
}
if (forceMirrors)
{
holder.UseMirrors();
}
BufferCount++;
ulong handle64 = (uint)_buffers.Add(holder);
return Unsafe.As<ulong, BufferHandle>(ref handle64);
}
public unsafe MemoryRequirements GetHostImportedUsageRequirements(VulkanRenderer gd)
{
var usage = HostImportedBufferUsageFlags;
if (gd.Capabilities.SupportsIndirectParameters)
{
usage |= BufferUsageFlags.IndirectBufferBit;
}
var bufferCreateInfo = new BufferCreateInfo
{
SType = StructureType.BufferCreateInfo,
Size = (ulong)Environment.SystemPageSize,
Usage = usage,
SharingMode = SharingMode.Exclusive,
};
gd.Api.CreateBuffer(_device, in bufferCreateInfo, null, out var buffer).ThrowOnError();
gd.Api.GetBufferMemoryRequirements(_device, buffer, out var requirements);
gd.Api.DestroyBuffer(_device, buffer, null);
return requirements;
}
public unsafe (VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) CreateBacking(
VulkanRenderer gd,
int size,
BufferAllocationType type,
bool forConditionalRendering = false,
BufferAllocationType fallbackType = BufferAllocationType.Auto)
{
var usage = DefaultBufferUsageFlags;
if (forConditionalRendering && gd.Capabilities.SupportsConditionalRendering)
{
usage |= BufferUsageFlags.ConditionalRenderingBitExt;
}
else if (gd.Capabilities.SupportsIndirectParameters)
{
usage |= BufferUsageFlags.IndirectBufferBit;
}
var bufferCreateInfo = new BufferCreateInfo
{
SType = StructureType.BufferCreateInfo,
Size = (ulong)size,
Usage = usage,
SharingMode = SharingMode.Exclusive,
};
gd.Api.CreateBuffer(_device, in bufferCreateInfo, null, out var buffer).ThrowOnError();
gd.Api.GetBufferMemoryRequirements(_device, buffer, out var requirements);
MemoryAllocation allocation;
do
{
var allocateFlags = type switch
{
BufferAllocationType.HostMappedNoCache => DefaultBufferMemoryNoCacheFlags,
BufferAllocationType.HostMapped => DefaultBufferMemoryFlags,
BufferAllocationType.DeviceLocal => DeviceLocalBufferMemoryFlags,
BufferAllocationType.DeviceLocalMapped => DeviceLocalMappedBufferMemoryFlags,
_ => DefaultBufferMemoryFlags,
};
// If an allocation with this memory type fails, fall back to the previous one.
try
{
allocation = gd.MemoryAllocator.AllocateDeviceMemory(requirements, allocateFlags, true);
}
catch (VulkanException)
{
allocation = default;
}
}
while (allocation.Memory.Handle == 0 && (--type != fallbackType));
if (allocation.Memory.Handle == 0UL)
{
gd.Api.DestroyBuffer(_device, buffer, null);
return default;
}
gd.Api.BindBufferMemory(_device, buffer, allocation.Memory, allocation.Offset);
return (buffer, allocation, type);
}
public BufferHolder Create(
VulkanRenderer gd,
int size,
bool forConditionalRendering = false,
BufferAllocationType baseType = BufferAllocationType.HostMapped,
BufferHandle storageHint = default)
{
BufferAllocationType type = baseType;
BufferHolder storageHintHolder = null;
if (baseType == BufferAllocationType.Auto)
{
if (gd.IsSharedMemory)
{
baseType = BufferAllocationType.HostMapped;
type = baseType;
}
else
{
type = size >= BufferHolder.DeviceLocalSizeThreshold ? BufferAllocationType.DeviceLocal : BufferAllocationType.HostMapped;
}
if (storageHint != BufferHandle.Null)
{
if (TryGetBuffer(storageHint, out storageHintHolder))
{
type = storageHintHolder.DesiredType;
}
}
}
(VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) =
CreateBacking(gd, size, type, forConditionalRendering);
if (buffer.Handle != 0)
{
var holder = new BufferHolder(gd, _device, buffer, allocation, size, baseType, resultType);
if (storageHintHolder != null)
{
holder.InheritMetrics(storageHintHolder);
}
return holder;
}
Logger.Error?.Print(LogClass.Gpu, $"Failed to create buffer with size 0x{size:X} and type \"{baseType}\".");
return null;
}
public Auto<DisposableBufferView> CreateView(BufferHandle handle, VkFormat format, int offset, int size, Action invalidateView)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.CreateView(format, offset, size, invalidateView);
}
return null;
}
public Auto<DisposableBuffer> GetBuffer(CommandBuffer commandBuffer, BufferHandle handle, bool isWrite, bool isSSBO = false)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetBuffer(commandBuffer, isWrite, isSSBO);
}
return null;
}
public Auto<DisposableBuffer> GetBuffer(CommandBuffer commandBuffer, BufferHandle handle, int offset, int size, bool isWrite)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetBuffer(commandBuffer, offset, size, isWrite);
}
return null;
}
public Auto<DisposableBuffer> GetBufferI8ToI16(CommandBufferScoped cbs, BufferHandle handle, int offset, int size)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetBufferI8ToI16(cbs, offset, size);
}
return null;
}
public Auto<DisposableBuffer> GetAlignedVertexBuffer(CommandBufferScoped cbs, BufferHandle handle, int offset, int size, int stride, int alignment)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetAlignedVertexBuffer(cbs, offset, size, stride, alignment);
}
return null;
}
public Auto<DisposableBuffer> GetBufferTopologyConversion(CommandBufferScoped cbs, BufferHandle handle, int offset, int size, IndexBufferPattern pattern, int indexSize)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetBufferTopologyConversion(cbs, offset, size, pattern, indexSize);
}
return null;
}
public (Auto<DisposableBuffer>, Auto<DisposableBuffer>) GetBufferTopologyConversionIndirect(
VulkanRenderer gd,
CommandBufferScoped cbs,
BufferRange indexBuffer,
BufferRange indirectBuffer,
BufferRange drawCountBuffer,
IndexBufferPattern pattern,
int indexSize,
bool hasDrawCount,
int maxDrawCount,
int indirectDataStride)
{
BufferHolder drawCountBufferHolder = null;
if (!TryGetBuffer(indexBuffer.Handle, out var indexBufferHolder) ||
!TryGetBuffer(indirectBuffer.Handle, out var indirectBufferHolder) ||
(hasDrawCount && !TryGetBuffer(drawCountBuffer.Handle, out drawCountBufferHolder)))
{
return (null, null);
}
var indexBufferKey = new TopologyConversionIndirectCacheKey(
gd,
pattern,
indexSize,
indirectBufferHolder,
indirectBuffer.Offset,
indirectBuffer.Size);
bool hasConvertedIndexBuffer = indexBufferHolder.TryGetCachedConvertedBuffer(
indexBuffer.Offset,
indexBuffer.Size,
indexBufferKey,
out var convertedIndexBuffer);
var indirectBufferKey = new IndirectDataCacheKey(pattern);
bool hasConvertedIndirectBuffer = indirectBufferHolder.TryGetCachedConvertedBuffer(
indirectBuffer.Offset,
indirectBuffer.Size,
indirectBufferKey,
out var convertedIndirectBuffer);
var drawCountBufferKey = new DrawCountCacheKey();
bool hasCachedDrawCount = true;
if (hasDrawCount)
{
hasCachedDrawCount = drawCountBufferHolder.TryGetCachedConvertedBuffer(
drawCountBuffer.Offset,
drawCountBuffer.Size,
drawCountBufferKey,
out _);
}
if (!hasConvertedIndexBuffer || !hasConvertedIndirectBuffer || !hasCachedDrawCount)
{
// The destination index size is always I32.
int indexCount = indexBuffer.Size / indexSize;
int convertedCount = pattern.GetConvertedCount(indexCount);
if (!hasConvertedIndexBuffer)
{
convertedIndexBuffer = Create(gd, convertedCount * 4);
indexBufferKey.SetBuffer(convertedIndexBuffer.GetBuffer());
indexBufferHolder.AddCachedConvertedBuffer(indexBuffer.Offset, indexBuffer.Size, indexBufferKey, convertedIndexBuffer);
}
if (!hasConvertedIndirectBuffer)
{
convertedIndirectBuffer = Create(gd, indirectBuffer.Size);
indirectBufferHolder.AddCachedConvertedBuffer(indirectBuffer.Offset, indirectBuffer.Size, indirectBufferKey, convertedIndirectBuffer);
}
gd.PipelineInternal.EndRenderPass();
gd.HelperShader.ConvertIndexBufferIndirect(
gd,
cbs,
indirectBufferHolder,
convertedIndirectBuffer,
drawCountBuffer,
indexBufferHolder,
convertedIndexBuffer,
pattern,
indexSize,
indexBuffer.Offset,
indexBuffer.Size,
indirectBuffer.Offset,
hasDrawCount,
maxDrawCount,
indirectDataStride);
// Any modification of the indirect buffer should invalidate the index buffers that are associated with it,
// since we used the indirect data to find the range of the index buffer that is used.
var indexBufferDependency = new Dependency(
indexBufferHolder,
indexBuffer.Offset,
indexBuffer.Size,
indexBufferKey);
indirectBufferHolder.AddCachedConvertedBufferDependency(
indirectBuffer.Offset,
indirectBuffer.Size,
indirectBufferKey,
indexBufferDependency);
if (hasDrawCount)
{
if (!hasCachedDrawCount)
{
drawCountBufferHolder.AddCachedConvertedBuffer(drawCountBuffer.Offset, drawCountBuffer.Size, drawCountBufferKey, null);
}
// If we have a draw count, any modification of the draw count should invalidate all indirect buffers
// where we used it to find the range of indirect data that is actually used.
var indirectBufferDependency = new Dependency(
indirectBufferHolder,
indirectBuffer.Offset,
indirectBuffer.Size,
indirectBufferKey);
drawCountBufferHolder.AddCachedConvertedBufferDependency(
drawCountBuffer.Offset,
drawCountBuffer.Size,
drawCountBufferKey,
indirectBufferDependency);
}
}
return (convertedIndexBuffer.GetBuffer(), convertedIndirectBuffer.GetBuffer());
}
public Auto<DisposableBuffer> GetBuffer(CommandBuffer commandBuffer, BufferHandle handle, bool isWrite, out int size)
{
if (TryGetBuffer(handle, out var holder))
{
size = holder.Size;
return holder.GetBuffer(commandBuffer, isWrite);
}
size = 0;
return null;
}
public PinnedSpan<byte> GetData(BufferHandle handle, int offset, int size)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetData(offset, size);
}
return new PinnedSpan<byte>();
}
public void SetData<T>(BufferHandle handle, int offset, ReadOnlySpan<T> data) where T : unmanaged
{
SetData(handle, offset, MemoryMarshal.Cast<T, byte>(data), null, null);
}
public void SetData(BufferHandle handle, int offset, ReadOnlySpan<byte> data, CommandBufferScoped? cbs, Action endRenderPass)
{
if (TryGetBuffer(handle, out var holder))
{
holder.SetData(offset, data, cbs, endRenderPass);
}
}
public void Delete(BufferHandle handle)
{
if (TryGetBuffer(handle, out var holder))
{
holder.Dispose();
_buffers.Remove((int)Unsafe.As<BufferHandle, ulong>(ref handle));
}
}
private bool TryGetBuffer(BufferHandle handle, out BufferHolder holder)
{
return _buffers.TryGetValue((int)Unsafe.As<BufferHandle, ulong>(ref handle), out holder);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
foreach (BufferHolder buffer in _buffers)
{
buffer.Dispose();
}
_buffers.Clear();
StagingBuffer.Dispose();
}
}
public void Dispose()
{
Dispose(true);
}
}
}