Use a bitmap to do granular tracking for buffer uploads.

This path is only taken if the much faster check of "is the buffer rented at all" is triggered, so it doesn't actually end up costing too much, and the time saved by not ending render passes (and on gpu for not waiting on barriers) is probably helpful.

Avoids ending render passes to update buffer data (not all the time)
- 140-180 to 35-45 in SMO metro kingdom (these updates are in the UI)
- Very variable 60-150(!) to 16-25 in mario kart 8 (these updates are in the UI)

As well as allowing more data to be preloaded persistently, this will also allow more data to be loaded in the preload buffer, which should be faster as it doesn't need to insert barriers between draws. (and on tbdr, does not need to flush and reload tile memory)

Improves performance in GPU limited scenarios. Should notably improve performance on TBDR gpus. Still a lot more to do here.
This commit is contained in:
riperiperi 2022-03-12 16:31:20 +00:00
parent 3e88558182
commit af4aae7951
5 changed files with 200 additions and 17 deletions

View file

@ -2,7 +2,9 @@
{
struct BitMap
{
private const int IntSize = 64;
public const int IntSize = 64;
private const int IntShift = 6;
private const int IntMask = IntSize - 1;
private readonly long[] _masks;
@ -27,7 +29,7 @@
public bool IsSet(int bit)
{
int wordIndex = bit / IntSize;
int wordIndex = bit >> IntShift;
int wordBit = bit & IntMask;
long wordMask = 1L << wordBit;
@ -35,10 +37,51 @@
return (_masks[wordIndex] & wordMask) != 0;
}
public bool IsSet(int start, int end)
{
if (start == end)
{
return IsSet(start);
}
int startIndex = start >> IntShift;
int startBit = start & IntMask;
long startMask = -1L << startBit;
int endIndex = end >> IntShift;
int endBit = end & IntMask;
long endMask = (long)(ulong.MaxValue >> (IntMask - endBit));
if (startIndex == endIndex)
{
return (_masks[startIndex] & startMask & endMask) != 0;
}
if ((_masks[startIndex] & startMask) != 0)
{
return true;
}
for (int i = startIndex + 1; i < endIndex; i++)
{
if (_masks[i] != 0)
{
return true;
}
}
if ((_masks[endIndex] & endMask) != 0)
{
return true;
}
return false;
}
public bool Set(int bit)
{
int wordIndex = bit / IntSize;
int wordBit = bit & IntMask;
int wordIndex = bit >> IntShift;
int wordBit = bit & IntMask;
long wordMask = 1L << wordBit;
@ -52,14 +95,63 @@
return true;
}
public void SetRange(int start, int end)
{
if (start == end)
{
Set(start);
return;
}
int startIndex = start >> IntShift;
int startBit = start & IntMask;
long startMask = -1L << startBit;
int endIndex = end >> IntShift;
int endBit = end & IntMask;
long endMask = (long)(ulong.MaxValue >> (IntMask - endBit));
if (startIndex == endIndex)
{
_masks[startIndex] |= startMask & endMask;
}
else
{
_masks[startIndex] |= startMask;
for (int i = startIndex + 1; i < endIndex; i++)
{
_masks[i] |= -1;
}
_masks[endIndex] |= endMask;
}
}
public void Clear(int bit)
{
int wordIndex = bit / IntSize;
int wordIndex = bit >> IntShift;
int wordBit = bit & IntMask;
long wordMask = 1L << wordBit;
_masks[wordIndex] &= ~wordMask;
}
public void Clear()
{
for (int i = 0; i < _masks.Length; i++)
{
_masks[i] = 0;
}
}
public void ClearInt(int start, int end)
{
for (int i = start; i <= end; i++)
{
_masks[i] = 0;
}
}
}
}

View file

@ -43,7 +43,7 @@ namespace Ryujinx.Graphics.Vulkan
_device = device;
_allocation = allocation;
_allocationAuto = new Auto<MemoryAllocation>(allocation);
_waitable = new MultiFenceHolder();
_waitable = new MultiFenceHolder(size);
_buffer = new Auto<DisposableBuffer>(new DisposableBuffer(gd.Api, device, buffer), _waitable, _allocationAuto);
_bufferHandle = buffer.Handle;
Size = size;
@ -162,6 +162,8 @@ namespace Ryujinx.Graphics.Vulkan
throw new InvalidOperationException("The buffer is not host mapped.");
}
public static int SlowLoads = 0;
public unsafe void SetData(int offset, ReadOnlySpan<byte> data, CommandBufferScoped? cbs = null, Action endRenderPass = null)
{
int dataSize = Math.Min(data.Length, Size - offset);
@ -173,9 +175,11 @@ namespace Ryujinx.Graphics.Vulkan
if (_map != IntPtr.Zero)
{
// If persistently mapped, set the data directly if the buffer is not currently in use.
// TODO: Reintroduce waitable & granular use tracking.
// bool needsFlush = _gd.CommandBufferPool.HasWaitableOnRentedCommandBuffer(_waitable, offset, dataSize);
bool needsFlush = _buffer.HasRentedCommandBufferDependency(_gd.CommandBufferPool);
bool isRented = _buffer.HasRentedCommandBufferDependency(_gd.CommandBufferPool);
// If the buffer is rented, take a little more time and check if the use overlaps this handle.
bool needsFlush = isRented && _waitable.IsBufferRangeInUse(offset, dataSize);
if (!needsFlush)
{
@ -187,7 +191,7 @@ namespace Ryujinx.Graphics.Vulkan
}
}
if (cbs != null && !_buffer.HasCommandBufferDependency(cbs.Value))
if (cbs != null && !(_buffer.HasCommandBufferDependency(cbs.Value) && _waitable.IsBufferRangeInUse(cbs.Value.CommandBufferIndex, offset, dataSize)))
{
// If the buffer hasn't been used on the command buffer yet, try to preload the data.
// This avoids ending and beginning render passes on each buffer data upload.

View file

@ -0,0 +1,61 @@
namespace Ryujinx.Graphics.Vulkan
{
internal class BufferUsageBitmap
{
private BitMap _bitmap;
private int _size;
private int _granularity;
private int _bits;
private int _intsPerCb;
private int _bitsPerCb;
public BufferUsageBitmap(int size, int granularity)
{
_size = size;
_granularity = granularity;
_bits = (size + (granularity - 1)) / granularity;
_intsPerCb = (_bits + (BitMap.IntSize - 1)) / BitMap.IntSize;
_bitsPerCb = _intsPerCb * BitMap.IntSize;
_bitmap = new BitMap(_bitsPerCb * CommandBufferPool.MaxCommandBuffers);
}
public void Add(int cbIndex, int offset, int size)
{
int cbBase = cbIndex * _bitsPerCb;
int start = cbBase + offset / _granularity;
int end = cbBase + (offset + size - 1) / _granularity;
_bitmap.SetRange(start, end);
}
public bool OverlapsWith(int cbIndex, int offset, int size)
{
int cbBase = cbIndex * _bitsPerCb;
int start = cbBase + offset / _granularity;
int end = cbBase + (offset + size - 1) / _granularity;
return _bitmap.IsSet(start, end);
}
public bool OverlapsWith(int offset, int size)
{
for (int i = 0; i < CommandBufferPool.MaxCommandBuffers; i++)
{
if (OverlapsWith(i, offset, size))
{
return true;
}
}
return false;
}
public void Clear(int cbIndex)
{
_bitmap.ClearInt(cbIndex * _intsPerCb, (cbIndex + 1) * _intsPerCb - 1);
}
}
}

View file

@ -10,8 +10,10 @@ namespace Ryujinx.Graphics.Vulkan
/// </summary>
class MultiFenceHolder
{
private static int BufferUsageTrackingGranularity = 4096;
private readonly Dictionary<FenceHolder, int> _fences;
private BufferRangeList _rangeList;
private BufferUsageBitmap _bufferUsageBitmap;
/// <summary>
/// Creates a new instance of the multiple fence holder.
@ -19,15 +21,27 @@ namespace Ryujinx.Graphics.Vulkan
public MultiFenceHolder()
{
_fences = new Dictionary<FenceHolder, int>();
_rangeList.Initialize();
}
/// <summary>
/// Creates a new instance of the multiple fence holder, with a given buffer size in mind.
/// </summary>
/// <param name="size">Size of the buffer</param>
public MultiFenceHolder(int size)
{
_fences = new Dictionary<FenceHolder, int>();
if (VulkanConfiguration.UseGranularBufferTracking)
{
_bufferUsageBitmap = new BufferUsageBitmap(size, BufferUsageTrackingGranularity);
}
}
public void AddBufferUse(int cbIndex, int offset, int size)
{
if (VulkanConfiguration.UseGranularBufferTracking)
{
_rangeList.Add(cbIndex, offset, size);
_bufferUsageBitmap.Add(cbIndex, offset, size);
}
}
@ -35,7 +49,7 @@ namespace Ryujinx.Graphics.Vulkan
{
if (VulkanConfiguration.UseGranularBufferTracking)
{
_rangeList.Clear(cbIndex);
_bufferUsageBitmap?.Clear(cbIndex);
}
}
@ -43,7 +57,19 @@ namespace Ryujinx.Graphics.Vulkan
{
if (VulkanConfiguration.UseGranularBufferTracking)
{
return _rangeList.OverlapsWith(cbIndex, offset, size);
return _bufferUsageBitmap.OverlapsWith(cbIndex, offset, size);
}
else
{
return true;
}
}
public bool IsBufferRangeInUse(int offset, int size)
{
if (VulkanConfiguration.UseGranularBufferTracking)
{
return _bufferUsageBitmap.OverlapsWith(offset, size);
}
else
{
@ -167,7 +193,7 @@ namespace Ryujinx.Graphics.Vulkan
var fence = kv.Key;
var ownerCbIndex = kv.Value;
if (_rangeList.OverlapsWith(ownerCbIndex, offset, size))
if (_bufferUsageBitmap.OverlapsWith(ownerCbIndex, offset, size))
{
return true;
}
@ -191,7 +217,7 @@ namespace Ryujinx.Graphics.Vulkan
var fence = kv.Key;
var ownerCbIndex = kv.Value;
if (_rangeList.OverlapsWith(ownerCbIndex, offset, size))
if (_bufferUsageBitmap.OverlapsWith(ownerCbIndex, offset, size))
{
overlapping.Add(fence);
}

View file

@ -5,7 +5,7 @@
public const bool UseDynamicState = true;
public const bool UseFastBufferUpdates = true;
public const bool UseGranularBufferTracking = false;
public const bool UseGranularBufferTracking = true;
public const bool UseSlowSafeBlitOnAmd = true;
public const bool ForceD24S8Unsupported = false;