From af4aae7951a26490f6b85931ba4c7e39bbe8e64f Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sat, 12 Mar 2022 16:31:20 +0000 Subject: [PATCH] Use a bitmap to do granular tracking for buffer uploads. This path is only taken if the much faster check of "is the buffer rented at all" is triggered, so it doesn't actually end up costing too much, and the time saved by not ending render passes (and on gpu for not waiting on barriers) is probably helpful. Avoids ending render passes to update buffer data (not all the time) - 140-180 to 35-45 in SMO metro kingdom (these updates are in the UI) - Very variable 60-150(!) to 16-25 in mario kart 8 (these updates are in the UI) As well as allowing more data to be preloaded persistently, this will also allow more data to be loaded in the preload buffer, which should be faster as it doesn't need to insert barriers between draws. (and on tbdr, does not need to flush and reload tile memory) Improves performance in GPU limited scenarios. Should notably improve performance on TBDR gpus. Still a lot more to do here. --- Ryujinx.Graphics.Vulkan/BitMap.cs | 102 +++++++++++++++++- Ryujinx.Graphics.Vulkan/BufferHolder.cs | 12 ++- Ryujinx.Graphics.Vulkan/BufferUsageBitmap.cs | 61 +++++++++++ Ryujinx.Graphics.Vulkan/MultiFenceHolder.cs | 40 +++++-- .../VulkanConfiguration.cs | 2 +- 5 files changed, 200 insertions(+), 17 deletions(-) create mode 100644 Ryujinx.Graphics.Vulkan/BufferUsageBitmap.cs diff --git a/Ryujinx.Graphics.Vulkan/BitMap.cs b/Ryujinx.Graphics.Vulkan/BitMap.cs index 19d852e62..ee3c3c938 100644 --- a/Ryujinx.Graphics.Vulkan/BitMap.cs +++ b/Ryujinx.Graphics.Vulkan/BitMap.cs @@ -2,7 +2,9 @@ { struct BitMap { - private const int IntSize = 64; + public const int IntSize = 64; + + private const int IntShift = 6; private const int IntMask = IntSize - 1; private readonly long[] _masks; @@ -27,7 +29,7 @@ public bool IsSet(int bit) { - int wordIndex = bit / IntSize; + int wordIndex = bit >> IntShift; int wordBit = bit & IntMask; long wordMask = 1L << wordBit; @@ -35,10 +37,51 @@ return (_masks[wordIndex] & wordMask) != 0; } + public bool IsSet(int start, int end) + { + if (start == end) + { + return IsSet(start); + } + + int startIndex = start >> IntShift; + int startBit = start & IntMask; + long startMask = -1L << startBit; + + int endIndex = end >> IntShift; + int endBit = end & IntMask; + long endMask = (long)(ulong.MaxValue >> (IntMask - endBit)); + + if (startIndex == endIndex) + { + return (_masks[startIndex] & startMask & endMask) != 0; + } + + if ((_masks[startIndex] & startMask) != 0) + { + return true; + } + + for (int i = startIndex + 1; i < endIndex; i++) + { + if (_masks[i] != 0) + { + return true; + } + } + + if ((_masks[endIndex] & endMask) != 0) + { + return true; + } + + return false; + } + public bool Set(int bit) { - int wordIndex = bit / IntSize; - int wordBit = bit & IntMask; + int wordIndex = bit >> IntShift; + int wordBit = bit & IntMask; long wordMask = 1L << wordBit; @@ -52,14 +95,63 @@ return true; } + public void SetRange(int start, int end) + { + if (start == end) + { + Set(start); + return; + } + + int startIndex = start >> IntShift; + int startBit = start & IntMask; + long startMask = -1L << startBit; + + int endIndex = end >> IntShift; + int endBit = end & IntMask; + long endMask = (long)(ulong.MaxValue >> (IntMask - endBit)); + + if (startIndex == endIndex) + { + _masks[startIndex] |= startMask & endMask; + } + else + { + _masks[startIndex] |= startMask; + + for (int i = startIndex + 1; i < endIndex; i++) + { + _masks[i] |= -1; + } + + _masks[endIndex] |= endMask; + } + } + public void Clear(int bit) { - int wordIndex = bit / IntSize; + int wordIndex = bit >> IntShift; int wordBit = bit & IntMask; long wordMask = 1L << wordBit; _masks[wordIndex] &= ~wordMask; } + + public void Clear() + { + for (int i = 0; i < _masks.Length; i++) + { + _masks[i] = 0; + } + } + + public void ClearInt(int start, int end) + { + for (int i = start; i <= end; i++) + { + _masks[i] = 0; + } + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Vulkan/BufferHolder.cs b/Ryujinx.Graphics.Vulkan/BufferHolder.cs index 6d2bbbc7b..1789f592c 100644 --- a/Ryujinx.Graphics.Vulkan/BufferHolder.cs +++ b/Ryujinx.Graphics.Vulkan/BufferHolder.cs @@ -43,7 +43,7 @@ namespace Ryujinx.Graphics.Vulkan _device = device; _allocation = allocation; _allocationAuto = new Auto(allocation); - _waitable = new MultiFenceHolder(); + _waitable = new MultiFenceHolder(size); _buffer = new Auto(new DisposableBuffer(gd.Api, device, buffer), _waitable, _allocationAuto); _bufferHandle = buffer.Handle; Size = size; @@ -162,6 +162,8 @@ namespace Ryujinx.Graphics.Vulkan throw new InvalidOperationException("The buffer is not host mapped."); } + public static int SlowLoads = 0; + public unsafe void SetData(int offset, ReadOnlySpan data, CommandBufferScoped? cbs = null, Action endRenderPass = null) { int dataSize = Math.Min(data.Length, Size - offset); @@ -173,9 +175,11 @@ namespace Ryujinx.Graphics.Vulkan if (_map != IntPtr.Zero) { // If persistently mapped, set the data directly if the buffer is not currently in use. - // TODO: Reintroduce waitable & granular use tracking. // bool needsFlush = _gd.CommandBufferPool.HasWaitableOnRentedCommandBuffer(_waitable, offset, dataSize); - bool needsFlush = _buffer.HasRentedCommandBufferDependency(_gd.CommandBufferPool); + bool isRented = _buffer.HasRentedCommandBufferDependency(_gd.CommandBufferPool); + + // If the buffer is rented, take a little more time and check if the use overlaps this handle. + bool needsFlush = isRented && _waitable.IsBufferRangeInUse(offset, dataSize); if (!needsFlush) { @@ -187,7 +191,7 @@ namespace Ryujinx.Graphics.Vulkan } } - if (cbs != null && !_buffer.HasCommandBufferDependency(cbs.Value)) + if (cbs != null && !(_buffer.HasCommandBufferDependency(cbs.Value) && _waitable.IsBufferRangeInUse(cbs.Value.CommandBufferIndex, offset, dataSize))) { // If the buffer hasn't been used on the command buffer yet, try to preload the data. // This avoids ending and beginning render passes on each buffer data upload. diff --git a/Ryujinx.Graphics.Vulkan/BufferUsageBitmap.cs b/Ryujinx.Graphics.Vulkan/BufferUsageBitmap.cs new file mode 100644 index 000000000..1eadd8385 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/BufferUsageBitmap.cs @@ -0,0 +1,61 @@ +namespace Ryujinx.Graphics.Vulkan +{ + internal class BufferUsageBitmap + { + private BitMap _bitmap; + private int _size; + private int _granularity; + private int _bits; + + private int _intsPerCb; + private int _bitsPerCb; + + public BufferUsageBitmap(int size, int granularity) + { + _size = size; + _granularity = granularity; + _bits = (size + (granularity - 1)) / granularity; + + _intsPerCb = (_bits + (BitMap.IntSize - 1)) / BitMap.IntSize; + _bitsPerCb = _intsPerCb * BitMap.IntSize; + + _bitmap = new BitMap(_bitsPerCb * CommandBufferPool.MaxCommandBuffers); + } + + public void Add(int cbIndex, int offset, int size) + { + int cbBase = cbIndex * _bitsPerCb; + int start = cbBase + offset / _granularity; + int end = cbBase + (offset + size - 1) / _granularity; + + _bitmap.SetRange(start, end); + } + + public bool OverlapsWith(int cbIndex, int offset, int size) + { + int cbBase = cbIndex * _bitsPerCb; + int start = cbBase + offset / _granularity; + int end = cbBase + (offset + size - 1) / _granularity; + + return _bitmap.IsSet(start, end); + } + + public bool OverlapsWith(int offset, int size) + { + for (int i = 0; i < CommandBufferPool.MaxCommandBuffers; i++) + { + if (OverlapsWith(i, offset, size)) + { + return true; + } + } + + return false; + } + + public void Clear(int cbIndex) + { + _bitmap.ClearInt(cbIndex * _intsPerCb, (cbIndex + 1) * _intsPerCb - 1); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/MultiFenceHolder.cs b/Ryujinx.Graphics.Vulkan/MultiFenceHolder.cs index 9e5b7809f..50917680c 100644 --- a/Ryujinx.Graphics.Vulkan/MultiFenceHolder.cs +++ b/Ryujinx.Graphics.Vulkan/MultiFenceHolder.cs @@ -10,8 +10,10 @@ namespace Ryujinx.Graphics.Vulkan /// class MultiFenceHolder { + private static int BufferUsageTrackingGranularity = 4096; + private readonly Dictionary _fences; - private BufferRangeList _rangeList; + private BufferUsageBitmap _bufferUsageBitmap; /// /// Creates a new instance of the multiple fence holder. @@ -19,15 +21,27 @@ namespace Ryujinx.Graphics.Vulkan public MultiFenceHolder() { _fences = new Dictionary(); - _rangeList.Initialize(); } + /// + /// Creates a new instance of the multiple fence holder, with a given buffer size in mind. + /// + /// Size of the buffer + public MultiFenceHolder(int size) + { + _fences = new Dictionary(); + + if (VulkanConfiguration.UseGranularBufferTracking) + { + _bufferUsageBitmap = new BufferUsageBitmap(size, BufferUsageTrackingGranularity); + } + } public void AddBufferUse(int cbIndex, int offset, int size) { if (VulkanConfiguration.UseGranularBufferTracking) { - _rangeList.Add(cbIndex, offset, size); + _bufferUsageBitmap.Add(cbIndex, offset, size); } } @@ -35,7 +49,7 @@ namespace Ryujinx.Graphics.Vulkan { if (VulkanConfiguration.UseGranularBufferTracking) { - _rangeList.Clear(cbIndex); + _bufferUsageBitmap?.Clear(cbIndex); } } @@ -43,7 +57,19 @@ namespace Ryujinx.Graphics.Vulkan { if (VulkanConfiguration.UseGranularBufferTracking) { - return _rangeList.OverlapsWith(cbIndex, offset, size); + return _bufferUsageBitmap.OverlapsWith(cbIndex, offset, size); + } + else + { + return true; + } + } + + public bool IsBufferRangeInUse(int offset, int size) + { + if (VulkanConfiguration.UseGranularBufferTracking) + { + return _bufferUsageBitmap.OverlapsWith(offset, size); } else { @@ -167,7 +193,7 @@ namespace Ryujinx.Graphics.Vulkan var fence = kv.Key; var ownerCbIndex = kv.Value; - if (_rangeList.OverlapsWith(ownerCbIndex, offset, size)) + if (_bufferUsageBitmap.OverlapsWith(ownerCbIndex, offset, size)) { return true; } @@ -191,7 +217,7 @@ namespace Ryujinx.Graphics.Vulkan var fence = kv.Key; var ownerCbIndex = kv.Value; - if (_rangeList.OverlapsWith(ownerCbIndex, offset, size)) + if (_bufferUsageBitmap.OverlapsWith(ownerCbIndex, offset, size)) { overlapping.Add(fence); } diff --git a/Ryujinx.Graphics.Vulkan/VulkanConfiguration.cs b/Ryujinx.Graphics.Vulkan/VulkanConfiguration.cs index b4d25edee..a91f1330f 100644 --- a/Ryujinx.Graphics.Vulkan/VulkanConfiguration.cs +++ b/Ryujinx.Graphics.Vulkan/VulkanConfiguration.cs @@ -5,7 +5,7 @@ public const bool UseDynamicState = true; public const bool UseFastBufferUpdates = true; - public const bool UseGranularBufferTracking = false; + public const bool UseGranularBufferTracking = true; public const bool UseSlowSafeBlitOnAmd = true; public const bool ForceD24S8Unsupported = false;