diff --git a/Ryujinx.Cpu/MemoryManager.cs b/Ryujinx.Cpu/MemoryManager.cs
index 348ca2bd2..cef201265 100644
--- a/Ryujinx.Cpu/MemoryManager.cs
+++ b/Ryujinx.Cpu/MemoryManager.cs
@@ -131,7 +131,7 @@ namespace Ryujinx.Cpu
         /// <exception cref="InvalidMemoryRegionException">Throw for unhandled invalid or unmapped memory accesses</exception>
         public T Read<T>(ulong va) where T : unmanaged
         {
-            return MemoryMarshal.Cast<byte, T>(GetSpan(va, Unsafe.SizeOf<T>()))[0];
+            return MemoryMarshal.Cast<byte, T>(GetSpan(va, Unsafe.SizeOf<T>(), true))[0];
         }
 
         /// <summary>
diff --git a/Ryujinx.Cpu/Tracking/CpuMultiRegionHandle.cs b/Ryujinx.Cpu/Tracking/CpuMultiRegionHandle.cs
index f76410b4b..8204a13eb 100644
--- a/Ryujinx.Cpu/Tracking/CpuMultiRegionHandle.cs
+++ b/Ryujinx.Cpu/Tracking/CpuMultiRegionHandle.cs
@@ -18,6 +18,7 @@ namespace Ryujinx.Cpu.Tracking
         public void QueryModified(Action<ulong, ulong> modifiedAction) => _impl.QueryModified(modifiedAction);
         public void QueryModified(ulong address, ulong size, Action<ulong, ulong> modifiedAction) => _impl.QueryModified(address, size, modifiedAction);
         public void QueryModified(ulong address, ulong size, Action<ulong, ulong> modifiedAction, int sequenceNumber) => _impl.QueryModified(address, size, modifiedAction, sequenceNumber);
+        public void RegisterAction(ulong address, ulong size, RegionSignal action) => _impl.RegisterAction(address, size, action);
         public void SignalWrite() => _impl.SignalWrite();
     }
 }
diff --git a/Ryujinx.Cpu/Tracking/CpuSmartMultiRegionHandle.cs b/Ryujinx.Cpu/Tracking/CpuSmartMultiRegionHandle.cs
index ddeeab0ae..e38babfc5 100644
--- a/Ryujinx.Cpu/Tracking/CpuSmartMultiRegionHandle.cs
+++ b/Ryujinx.Cpu/Tracking/CpuSmartMultiRegionHandle.cs
@@ -15,6 +15,7 @@ namespace Ryujinx.Cpu.Tracking
         }
 
         public void Dispose() => _impl.Dispose();
+        public void RegisterAction(RegionSignal action) => _impl.RegisterAction(action);
         public void QueryModified(Action<ulong, ulong> modifiedAction) => _impl.QueryModified(modifiedAction);
         public void QueryModified(ulong address, ulong size, Action<ulong, ulong> modifiedAction) => _impl.QueryModified(address, size, modifiedAction);
         public void QueryModified(ulong address, ulong size, Action<ulong, ulong> modifiedAction, int sequenceNumber) => _impl.QueryModified(address, size, modifiedAction, sequenceNumber);
diff --git a/Ryujinx.Graphics.GAL/IRenderer.cs b/Ryujinx.Graphics.GAL/IRenderer.cs
index 465c88053..d03cb4c01 100644
--- a/Ryujinx.Graphics.GAL/IRenderer.cs
+++ b/Ryujinx.Graphics.GAL/IRenderer.cs
@@ -21,6 +21,8 @@ namespace Ryujinx.Graphics.GAL
         ISampler CreateSampler(SamplerCreateInfo info);
         ITexture CreateTexture(TextureCreateInfo info, float scale);
 
+        void CreateSync(ulong id);
+
         void DeleteBuffer(BufferHandle buffer);
 
         byte[] GetBufferData(BufferHandle buffer, int offset, int size);
@@ -39,6 +41,8 @@ namespace Ryujinx.Graphics.GAL
 
         void ResetCounter(CounterType type);
 
+        void WaitSync(ulong id);
+
         void Initialize(GraphicsDebugLevel logLevel);
     }
 }
diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
index fd3114a79..c7e059ba3 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
@@ -97,7 +97,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
                 SbDescriptor sbDescriptor = _context.PhysicalMemory.Read<SbDescriptor>(sbDescAddress);
 
-                BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size);
+                BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
             }
 
             BufferManager.SetComputeStorageBufferBindings(info.SBuffers);
diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
index 0e87aa3d2..84d353502 100644
--- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs
@@ -39,6 +39,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
                 { nameof(GPFifoClassState.Semaphored), new RwCallback(Semaphored, null) },
                 { nameof(GPFifoClassState.Syncpointb), new RwCallback(Syncpointb, null) },
                 { nameof(GPFifoClassState.WaitForIdle), new RwCallback(WaitForIdle, null) },
+                { nameof(GPFifoClassState.SetReference), new RwCallback(SetReference, null) },
                 { nameof(GPFifoClassState.LoadMmeInstructionRam), new RwCallback(LoadMmeInstructionRam, null) },
                 { nameof(GPFifoClassState.LoadMmeStartAddressRam), new RwCallback(LoadMmeStartAddressRam, null) },
                 { nameof(GPFifoClassState.SetMmeShadowRamControl), new RwCallback(SetMmeShadowRamControl, null) }
@@ -136,6 +137,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
             }
             else if (operation == SyncpointbOperation.Incr)
             {
+                _context.CreateHostSyncIfNeeded();
                 _context.Synchronization.IncrementSyncpoint(syncpointId);
             }
 
@@ -150,6 +152,17 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         {
             _context.Methods.PerformDeferredDraws();
             _context.Renderer.Pipeline.Barrier();
+
+            _context.CreateHostSyncIfNeeded();
+        }
+
+        /// <summary>
+        /// Used as an indirect data barrier on NVN. When used, access to previously written data must be coherent.
+        /// </summary>
+        /// <param name="argument">Method call argument</param>
+        public void SetReference(int argument)
+        {
+            _context.CreateHostSyncIfNeeded();
         }
 
         /// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
index 25614a135..d0fcf1421 100644
--- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
@@ -52,7 +52,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
             {
                 if (Words == null)
                 {
-                    Words = MemoryMarshal.Cast<byte, int>(context.MemoryManager.GetSpan(EntryAddress, (int)EntryCount * 4)).ToArray();
+                    Words = MemoryMarshal.Cast<byte, int>(context.MemoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, true)).ToArray();
                 }
             }
         }
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs b/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs
index 8fcfb9000..9c22275d5 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs
@@ -13,6 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
         {
             uint syncpointId = (uint)(argument) & 0xFFFF;
 
+            _context.CreateHostSyncIfNeeded();
             _context.Renderer.UpdateCounters(); // Poll the query counters, the game may want an updated result.
             _context.Synchronization.IncrementSyncpoint(syncpointId);
         }
diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
index 9f27aec22..d6bd51106 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
@@ -61,6 +61,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
             context.MemoryManager.MemoryUnmapped += _counterCache.MemoryUnmappedHandler;
             context.MemoryManager.MemoryUnmapped += TextureManager.MemoryUnmappedHandler;
+            context.MemoryManager.MemoryUnmapped += BufferManager.MemoryUnmappedHandler;
         }
 
         /// <summary>
@@ -333,7 +334,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
                     SbDescriptor sbDescriptor = _context.PhysicalMemory.Read<SbDescriptor>(sbDescAddress);
 
-                    BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size);
+                    BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
                 }
             }
         }
diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs
index 6834afb42..15f757c87 100644
--- a/Ryujinx.Graphics.Gpu/GpuContext.cs
+++ b/Ryujinx.Graphics.Gpu/GpuContext.cs
@@ -4,6 +4,7 @@ using Ryujinx.Graphics.Gpu.Engine.GPFifo;
 using Ryujinx.Graphics.Gpu.Memory;
 using Ryujinx.Graphics.Gpu.Synchronization;
 using System;
+using System.Collections.Generic;
 using System.Threading;
 
 namespace Ryujinx.Graphics.Gpu
@@ -59,6 +60,18 @@ namespace Ryujinx.Graphics.Gpu
         /// </summary>
         internal int SequenceNumber { get; private set; }
 
+        /// <summary>
+        /// Internal sync number, used to denote points at which host synchronization can be requested.
+        /// </summary>
+        internal ulong SyncNumber { get; private set; }
+
+        /// <summary>
+        /// Actions to be performed when a CPU waiting sync point is triggered.
+        /// If there are more than 0 items when this happens, a host sync object will be generated for the given <see cref="SyncNumber"/>,
+        /// and the SyncNumber will be incremented.
+        /// </summary>
+        internal List<Action> SyncActions { get; }
+
         private readonly Lazy<Capabilities> _caps;
 
         /// <summary>
@@ -87,6 +100,8 @@ namespace Ryujinx.Graphics.Gpu
             _caps = new Lazy<Capabilities>(Renderer.GetCapabilities);
 
             HostInitalized = new ManualResetEvent(false);
+
+            SyncActions = new List<Action>();
         }
 
         /// <summary>
@@ -118,6 +133,37 @@ namespace Ryujinx.Graphics.Gpu
             PhysicalMemory = new PhysicalMemory(cpuMemory);
         }
 
+        /// <summary>
+        /// Registers an action to be performed the next time a syncpoint is incremented.
+        /// This will also ensure a host sync object is created, and <see cref="SyncNumber"/> is incremented.
+        /// </summary>
+        /// <param name="action">The action to be performed on sync object creation</param>
+        public void RegisterSyncAction(Action action)
+        {
+            SyncActions.Add(action);
+        }
+
+        /// <summary>
+        /// Creates a host sync object if there are any pending sync actions. The actions will then be called.
+        /// If no actions are present, a host sync object is not created.
+        /// </summary>
+        public void CreateHostSyncIfNeeded()
+        {
+            if (SyncActions.Count > 0)
+            {
+                Renderer.CreateSync(SyncNumber);
+
+                SyncNumber++;
+
+                foreach (Action action in SyncActions)
+                {
+                    action();
+                }
+
+                SyncActions.Clear();
+            }
+        }
+
         /// <summary>
         /// Disposes all GPU resources currently cached.
         /// It's an error to push any GPU commands after disposal.
diff --git a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs
index bf2452833..7127871a7 100644
--- a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs
@@ -1,6 +1,7 @@
 using Ryujinx.Cpu.Tracking;
 using Ryujinx.Graphics.GAL;
 using Ryujinx.Memory.Range;
+using Ryujinx.Memory.Tracking;
 using System;
 
 namespace Ryujinx.Graphics.Gpu.Memory
@@ -34,12 +35,28 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// </summary>
         public ulong EndAddress => Address + Size;
 
+        /// <summary>
+        /// Ranges of the buffer that have been modified on the GPU.
+        /// Ranges defined here cannot be updated from CPU until a CPU waiting sync point is reached.
+        /// Then, write tracking will signal, wait for GPU sync (generated at the syncpoint) and flush these regions.
+        /// </summary>
+        /// <remarks>
+        /// This is null until at least one modification occurs.
+        /// </remarks>
+        private BufferModifiedRangeList _modifiedRanges = null;
+
         private CpuMultiRegionHandle _memoryTrackingGranular;
+
         private CpuRegionHandle _memoryTracking;
+
+        private readonly RegionSignal _externalFlushDelegate;
+        private readonly Action<ulong, ulong> _loadDelegate;
         private readonly Action<ulong, ulong> _modifiedDelegate;
+
         private int _sequenceNumber;
 
         private bool _useGranular;
+        private bool _syncActionRegistered;
 
         /// <summary>
         /// Creates a new instance of the buffer.
@@ -66,6 +83,8 @@ namespace Ryujinx.Graphics.Gpu.Memory
                 _memoryTracking = context.PhysicalMemory.BeginTracking(address, size);
             }
 
+            _externalFlushDelegate = new RegionSignal(ExternalFlush);
+            _loadDelegate = new Action<ulong, ulong>(LoadRegion);
             _modifiedDelegate = new Action<ulong, ulong>(RegionModified);
         }
 
@@ -116,12 +135,131 @@ namespace Ryujinx.Graphics.Gpu.Memory
                 if (_memoryTracking.Dirty && _context.SequenceNumber != _sequenceNumber)
                 {
                     _memoryTracking.Reprotect();
-                    _context.Renderer.SetBufferData(Handle, 0, _context.PhysicalMemory.GetSpan(Address, (int)Size));
+
+                    if (_modifiedRanges != null)
+                    {
+                        _modifiedRanges.ExcludeModifiedRegions(Address, Size, _loadDelegate);
+                    }
+                    else
+                    {
+                        _context.Renderer.SetBufferData(Handle, 0, _context.PhysicalMemory.GetSpan(Address, (int)Size));
+                    }
+                    
                     _sequenceNumber = _context.SequenceNumber;
                 }
             }
         }
 
+        /// <summary>
+        /// Ensure that the modified range list exists.
+        /// </summary>
+        private void EnsureRangeList()
+        {
+            if (_modifiedRanges == null)
+            {
+                _modifiedRanges = new BufferModifiedRangeList(_context);
+            }
+        }
+
+        /// <summary>
+        /// Signal that the given region of the buffer has been modified.
+        /// </summary>
+        /// <param name="address">The start address of the modified region</param>
+        /// <param name="size">The size of the modified region</param>
+        public void SignalModified(ulong address, ulong size)
+        {
+            EnsureRangeList();
+
+            _modifiedRanges.SignalModified(address, size);
+
+            if (!_syncActionRegistered)
+            {
+                _context.RegisterSyncAction(SyncAction);
+                _syncActionRegistered = true;
+            }
+        }
+
+        /// <summary>
+        /// Indicate that mofifications in a given region of this buffer have been overwritten.
+        /// </summary>
+        /// <param name="address">The start address of the region</param>
+        /// <param name="size">The size of the region</param>
+        public void ClearModified(ulong address, ulong size)
+        {
+            if (_modifiedRanges != null)
+            {
+                _modifiedRanges.Clear(address, size);
+            }
+        }
+
+        /// <summary>
+        /// Action to be performed when a syncpoint is reached after modification.
+        /// This will register read/write tracking to flush the buffer from GPU when its memory is used.
+        /// </summary>
+        private void SyncAction()
+        {
+            _syncActionRegistered = false;
+
+            if (_useGranular)
+            {
+                _modifiedRanges.GetRanges(Address, Size, (address, size) =>
+                {
+                    _memoryTrackingGranular.RegisterAction(address, size, _externalFlushDelegate);
+                    SynchronizeMemory(address, size);
+                });
+            }
+            else
+            {
+                _memoryTracking.RegisterAction(_externalFlushDelegate);
+                SynchronizeMemory(Address, Size);
+            }
+        }
+
+        /// <summary>
+        /// Inherit modified ranges from another buffer.
+        /// </summary>
+        /// <param name="from">The buffer to inherit from</param>
+        public void InheritModifiedRanges(Buffer from)
+        {
+            if (from._modifiedRanges != null)
+            {
+                if (from._syncActionRegistered && !_syncActionRegistered)
+                {
+                    _context.RegisterSyncAction(SyncAction);
+                    _syncActionRegistered = true;
+                }
+
+                EnsureRangeList();
+                _modifiedRanges.InheritRanges(from._modifiedRanges, (ulong address, ulong size) =>
+                {
+                    if (_useGranular)
+                    {
+                        _memoryTrackingGranular.RegisterAction(address, size, _externalFlushDelegate);
+                    }
+                    else
+                    {
+                        _memoryTracking.RegisterAction(_externalFlushDelegate);
+                    }
+                });
+            }
+        }
+
+        /// <summary>
+        /// Determine if a given region of the buffer has been modified, and must be flushed.
+        /// </summary>
+        /// <param name="address">The start address of the region</param>
+        /// <param name="size">The size of the region</param>
+        /// <returns></returns>
+        public bool IsModified(ulong address, ulong size)
+        {
+            if (_modifiedRanges != null)
+            {
+                return _modifiedRanges.HasRange(address, size);
+            }
+
+            return false;
+        }
+
         /// <summary>
         /// Indicate that a region of the buffer was modified, and must be loaded from memory.
         /// </summary>
@@ -141,6 +279,23 @@ namespace Ryujinx.Graphics.Gpu.Memory
                 mSize = maxSize;
             }
 
+            if (_modifiedRanges != null)
+            {
+                _modifiedRanges.ExcludeModifiedRegions(mAddress, mSize, _loadDelegate);
+            }
+            else
+            {
+                LoadRegion(mAddress, mSize);
+            }
+        }
+
+        /// <summary>
+        /// Load a region of the buffer from memory.
+        /// </summary>
+        /// <param name="mAddress">Start address of the modified region</param>
+        /// <param name="mSize">Size of the modified region</param>
+        private void LoadRegion(ulong mAddress, ulong mSize)
+        {
             int offset = (int)(mAddress - Address);
 
             _context.Renderer.SetBufferData(Handle, offset, _context.PhysicalMemory.GetSpan(mAddress, (int)mSize));
@@ -172,15 +327,62 @@ namespace Ryujinx.Graphics.Gpu.Memory
             _context.PhysicalMemory.WriteUntracked(address, data);
         }
 
+        /// <summary>
+        /// Align a given address and size region to page boundaries.
+        /// </summary>
+        /// <param name="address">The start address of the region</param>
+        /// <param name="size">The size of the region</param>
+        /// <returns>The page aligned address and size</returns>
+        private static (ulong address, ulong size) PageAlign(ulong address, ulong size)
+        {
+            ulong pageMask = MemoryManager.PageMask;
+            ulong rA = address & ~pageMask;
+            ulong rS = ((address + size + pageMask) & ~pageMask) - rA;
+            return (rA, rS);
+        }
+
+        /// <summary>
+        /// Flush modified ranges of the buffer from another thread.
+        /// This will flush all modifications made before the active SyncNumber was set, and may block to wait for GPU sync.
+        /// </summary>
+        /// <param name="address">Address of the memory action</param>
+        /// <param name="size">Size in bytes</param>
+        public void ExternalFlush(ulong address, ulong size)
+        {
+            _context.Renderer.BackgroundContextAction(() =>
+            {
+                var ranges = _modifiedRanges;
+
+                if (ranges != null)
+                {
+                    (address, size) = PageAlign(address, size);
+                    ranges.WaitForAndGetRanges(address, size, Flush);
+                }
+            });
+        }
+
+        /// <summary>
+        /// Called when part of the memory for this buffer has been unmapped.
+        /// Calls are from non-GPU threads.
+        /// </summary>
+        /// <param name="address">Start address of the unmapped region</param>
+        /// <param name="size">Size of the unmapped region</param>
+        public void Unmapped(ulong address, ulong size)
+        {
+            _modifiedRanges?.Clear(address, size);
+        }
+
         /// <summary>
         /// Disposes the host buffer.
         /// </summary>
         public void Dispose()
         {
-            _context.Renderer.DeleteBuffer(Handle);
+            _modifiedRanges?.Clear();
 
             _memoryTrackingGranular?.Dispose();
             _memoryTracking?.Dispose();
+
+            _context.Renderer.DeleteBuffer(Handle);
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferBounds.cs b/Ryujinx.Graphics.Gpu/Memory/BufferBounds.cs
index 060171fb0..5569b9470 100644
--- a/Ryujinx.Graphics.Gpu/Memory/BufferBounds.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/BufferBounds.cs
@@ -1,3 +1,5 @@
+using Ryujinx.Graphics.Shader;
+
 namespace Ryujinx.Graphics.Gpu.Memory
 {
     /// <summary>
@@ -15,15 +17,22 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// </summary>
         public ulong Size { get; }
 
+        /// <summary>
+        /// Buffer usage flags.
+        /// </summary>
+        public BufferUsageFlags Flags { get; }
+
         /// <summary>
         /// Creates a new buffer region.
         /// </summary>
         /// <param name="address">Region address</param>
         /// <param name="size">Region size</param>
-        public BufferBounds(ulong address, ulong size)
+        /// <param name="flags">Buffer usage flags</param>
+        public BufferBounds(ulong address, ulong size, BufferUsageFlags flags = BufferUsageFlags.None)
         {
             Address = address;
             Size = size;
+            Flags = flags;
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
index 0c6431913..cdcc5a370 100644
--- a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs
@@ -68,9 +68,10 @@ namespace Ryujinx.Graphics.Gpu.Memory
             /// <param name="index">Buffer slot</param>
             /// <param name="address">Region virtual address</param>
             /// <param name="size">Region size in bytes</param>
-            public void SetBounds(int index, ulong address, ulong size)
+            /// <param name="flags">Buffer usage flags</param>
+            public void SetBounds(int index, ulong address, ulong size, BufferUsageFlags flags = BufferUsageFlags.None)
             {
-                Buffers[index] = new BufferBounds(address, size);
+                Buffers[index] = new BufferBounds(address, size, flags);
             }
 
             /// <summary>
@@ -219,7 +220,8 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// <param name="index">Index of the storage buffer</param>
         /// <param name="gpuVa">Start GPU virtual address of the buffer</param>
         /// <param name="size">Size in bytes of the storage buffer</param>
-        public void SetComputeStorageBuffer(int index, ulong gpuVa, ulong size)
+        /// <param name="flags">Buffer usage flags</param>
+        public void SetComputeStorageBuffer(int index, ulong gpuVa, ulong size, BufferUsageFlags flags)
         {
             size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
 
@@ -227,7 +229,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
 
             ulong address = TranslateAndCreateBuffer(gpuVa, size);
 
-            _cpStorageBuffers.SetBounds(index, address, size);
+            _cpStorageBuffers.SetBounds(index, address, size, flags);
         }
 
         /// <summary>
@@ -238,7 +240,8 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// <param name="index">Index of the storage buffer</param>
         /// <param name="gpuVa">Start GPU virtual address of the buffer</param>
         /// <param name="size">Size in bytes of the storage buffer</param>
-        public void SetGraphicsStorageBuffer(int stage, int index, ulong gpuVa, ulong size)
+        /// <param name="flags">Buffer usage flags</param>
+        public void SetGraphicsStorageBuffer(int stage, int index, ulong gpuVa, ulong size, BufferUsageFlags flags)
         {
             size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
 
@@ -252,7 +255,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
                 _gpStorageBuffersDirty = true;
             }
 
-            _gpStorageBuffers[stage].SetBounds(index, address, size);
+            _gpStorageBuffers[stage].SetBounds(index, address, size, flags);
         }
 
         /// <summary>
@@ -385,6 +388,30 @@ namespace Ryujinx.Graphics.Gpu.Memory
             return mask;
         }
 
+        /// <summary>
+        /// Handles removal of buffers written to a memory region being unmapped.
+        /// </summary>
+        /// <param name="sender">Sender object</param>
+        /// <param name="e">Event arguments</param>
+        public void MemoryUnmappedHandler(object sender, UnmapEventArgs e)
+        {
+            Buffer[] overlaps = new Buffer[10];
+            int overlapCount;
+
+            ulong address = _context.MemoryManager.Translate(e.Address);
+            ulong size = e.Size;
+
+            lock (_buffers)
+            {
+                overlapCount = _buffers.FindOverlaps(address, size, ref overlaps);
+            }
+
+            for (int i = 0; i < overlapCount; i++)
+            {
+                overlaps[i].Unmapped(address, size);
+            }
+        }
+
         /// <summary>
         /// Performs address translation of the GPU virtual address, and creates a
         /// new buffer, if needed, for the specified range.
@@ -443,7 +470,12 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// <param name="size">Size in bytes of the buffer</param>
         private void CreateBufferAligned(ulong address, ulong size)
         {
-            int overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref _bufferOverlaps);
+            int overlapsCount;
+
+            lock (_buffers)
+            {
+                overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref _bufferOverlaps);
+            }
 
             if (overlapsCount != 0)
             {
@@ -463,15 +495,19 @@ namespace Ryujinx.Graphics.Gpu.Memory
                         address    = Math.Min(address,    buffer.Address);
                         endAddress = Math.Max(endAddress, buffer.EndAddress);
 
-                        buffer.SynchronizeMemory(buffer.Address, buffer.Size);
-
-                        _buffers.Remove(buffer);
+                        lock (_buffers)
+                        {
+                            _buffers.Remove(buffer);
+                        }
                     }
 
                     Buffer newBuffer = new Buffer(_context, address, endAddress - address);
                     newBuffer.SynchronizeMemory(address, endAddress - address);
 
-                    _buffers.Add(newBuffer);
+                    lock (_buffers)
+                    {
+                        _buffers.Add(newBuffer);
+                    }
 
                     for (int index = 0; index < overlapsCount; index++)
                     {
@@ -479,7 +515,10 @@ namespace Ryujinx.Graphics.Gpu.Memory
 
                         int dstOffset = (int)(buffer.Address - newBuffer.Address);
 
+                        buffer.SynchronizeMemory(buffer.Address, buffer.Size);
+
                         buffer.CopyTo(newBuffer, dstOffset);
+                        newBuffer.InheritModifiedRanges(buffer);
 
                         buffer.Dispose();
                     }
@@ -493,7 +532,10 @@ namespace Ryujinx.Graphics.Gpu.Memory
                 // No overlap, just create a new buffer.
                 Buffer buffer = new Buffer(_context, address, size);
 
-                _buffers.Add(buffer);
+                lock (_buffers)
+                {
+                    _buffers.Add(buffer);
+                }
             }
 
             ShrinkOverlapsBufferIfNeeded();
@@ -549,7 +591,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
 
                 if (bounds.Address != 0)
                 {
-                    sRanges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size);
+                    sRanges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size, bounds.Flags.HasFlag(BufferUsageFlags.Write));
                 }
             }
 
@@ -722,7 +764,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
 
                     if (bounds.Address != 0)
                     {
-                        ranges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size);
+                        ranges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size, bounds.Flags.HasFlag(BufferUsageFlags.Write));
                     }
                 }
             }
@@ -818,7 +860,17 @@ namespace Ryujinx.Graphics.Gpu.Memory
                 dstOffset,
                 (int)size);
 
-            dstBuffer.Flush(dstAddress, size);
+            if (srcBuffer.IsModified(srcAddress, size))
+            {
+                dstBuffer.SignalModified(dstAddress, size);
+            }
+            else
+            {
+                // Optimization: If the data being copied is already in memory, then copy it directly instead of flushing from GPU.
+
+                dstBuffer.ClearModified(dstAddress, size);
+                _context.PhysicalMemory.WriteUntracked(dstAddress, _context.PhysicalMemory.GetSpan(srcAddress, (int)size));
+            }
         }
 
         /// <summary>
@@ -840,7 +892,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
 
             _context.Renderer.Pipeline.ClearBuffer(buffer.Handle, offset, (int)size, value);
 
-            buffer.Flush(address, size);
+            buffer.SignalModified(address, size);
         }
 
         /// <summary>
@@ -848,10 +900,11 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// </summary>
         /// <param name="address">Start address of the memory range</param>
         /// <param name="size">Size in bytes of the memory range</param>
+        /// <param name="write">Whether the buffer will be written to by this use</param>
         /// <returns>The buffer sub-range for the given range</returns>
-        private BufferRange GetBufferRange(ulong address, ulong size)
+        private BufferRange GetBufferRange(ulong address, ulong size, bool write = false)
         {
-            return GetBuffer(address, size).GetRange(address, size);
+            return GetBuffer(address, size, write).GetRange(address, size);
         }
 
         /// <summary>
@@ -860,20 +913,32 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// </summary>
         /// <param name="address">Start address of the memory range</param>
         /// <param name="size">Size in bytes of the memory range</param>
+        /// <param name="write">Whether the buffer will be written to by this use</param>
         /// <returns>The buffer where the range is fully contained</returns>
-        private Buffer GetBuffer(ulong address, ulong size)
+        private Buffer GetBuffer(ulong address, ulong size, bool write = false)
         {
             Buffer buffer;
 
             if (size != 0)
             {
-                buffer = _buffers.FindFirstOverlap(address, size);
+                lock (_buffers)
+                {
+                    buffer = _buffers.FindFirstOverlap(address, size);
+                }
 
                 buffer.SynchronizeMemory(address, size);
+
+                if (write)
+                {
+                    buffer.SignalModified(address, size);
+                }
             }
             else
             {
-                buffer = _buffers.FindFirstOverlap(address, 1);
+                lock (_buffers)
+                {
+                    buffer = _buffers.FindFirstOverlap(address, 1);
+                }
             }
 
             return buffer;
@@ -888,7 +953,12 @@ namespace Ryujinx.Graphics.Gpu.Memory
         {
             if (size != 0)
             {
-                Buffer buffer = _buffers.FindFirstOverlap(address, size);
+                Buffer buffer;
+
+                lock (_buffers)
+                {
+                    buffer = _buffers.FindFirstOverlap(address, size);
+                }
 
                 buffer.SynchronizeMemory(address, size);
             }
@@ -900,9 +970,12 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// </summary>
         public void Dispose()
         {
-            foreach (Buffer buffer in _buffers)
+            lock (_buffers)
             {
-                buffer.Dispose();
+                foreach (Buffer buffer in _buffers)
+                {
+                    buffer.Dispose();
+                }
             }
         }
     }
diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs b/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs
new file mode 100644
index 000000000..594dd0664
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs
@@ -0,0 +1,367 @@
+using Ryujinx.Memory.Range;
+using System;
+using System.Linq;
+
+namespace Ryujinx.Graphics.Gpu.Memory
+{
+    /// <summary>
+    /// A range within a buffer that has been modified by the GPU.
+    /// </summary>
+    class BufferModifiedRange : IRange
+    {
+        /// <summary>
+        /// Start address of the range in guest memory.
+        /// </summary>
+        public ulong Address { get; }
+
+        /// <summary>
+        /// Size of the range in bytes.
+        /// </summary>
+        public ulong Size { get; }
+
+        /// <summary>
+        /// End address of the range in guest memory.
+        /// </summary>
+        public ulong EndAddress => Address + Size;
+
+        /// <summary>
+        /// The GPU sync number at the time of the last modification.
+        /// </summary>
+        public ulong SyncNumber { get; internal set; }
+
+        /// <summary>
+        /// Creates a new instance of a modified range.
+        /// </summary>
+        /// <param name="address">Start address of the range</param>
+        /// <param name="size">Size of the range in bytes</param>
+        /// <param name="syncNumber">The GPU sync number at the time of creation</param>
+        public BufferModifiedRange(ulong address, ulong size, ulong syncNumber)
+        {
+            Address = address;
+            Size = size;
+            SyncNumber = syncNumber;
+        }
+
+        /// <summary>
+        /// Checks if a given range overlaps with the modified range.
+        /// </summary>
+        /// <param name="address">Start address of the range</param>
+        /// <param name="size">Size in bytes of the range</param>
+        /// <returns>True if the range overlaps, false otherwise</returns>
+        public bool OverlapsWith(ulong address, ulong size)
+        {
+            return Address < address + size && address < EndAddress;
+        }
+    }
+
+    /// <summary>
+    /// A structure used to track GPU modified ranges within a buffer.
+    /// </summary>
+    class BufferModifiedRangeList : RangeList<BufferModifiedRange>
+    {
+        private GpuContext _context;
+
+        private object _lock = new object();
+
+        // The list can be accessed from both the GPU thread, and a background thread.
+        private BufferModifiedRange[] _foregroundOverlaps = new BufferModifiedRange[1];
+        private BufferModifiedRange[] _backgroundOverlaps = new BufferModifiedRange[1];
+
+        /// <summary>
+        /// Creates a new instance of a modified range list.
+        /// </summary>
+        /// <param name="context">GPU context that the buffer range list belongs to</param>
+        public BufferModifiedRangeList(GpuContext context)
+        {
+            _context = context;
+        }
+
+        /// <summary>
+        /// Given an input range, calls the given action with sub-ranges which exclude any of the modified regions.
+        /// </summary>
+        /// <param name="address">Start address of the query range</param>
+        /// <param name="size">Size of the query range in bytes</param>
+        /// <param name="action">Action to perform for each remaining sub-range of the input range</param>
+        public void ExcludeModifiedRegions(ulong address, ulong size, Action<ulong, ulong> action)
+        {
+            lock (_lock)
+            {
+                // Slices a given region using the modified regions in the list. Calls the action for the new slices.
+                int count = FindOverlapsNonOverlapping(address, size, ref _foregroundOverlaps);
+
+                for (int i = 0; i < count; i++)
+                {
+                    BufferModifiedRange overlap = _foregroundOverlaps[i];
+                    
+                    if (overlap.Address > address)
+                    {
+                        // The start of the remaining region is uncovered by this overlap. Call the action for it.
+                        action(address, overlap.Address - address);
+                    }
+
+                    // Remaining region is after this overlap.
+                    size -= overlap.EndAddress - address;
+                    address = overlap.EndAddress;
+                }
+
+                if ((long)size > 0)
+                {
+                    // If there is any region left after removing the overlaps, signal it.
+                    action(address, size);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Signal that a region of the buffer has been modified, and add the new region to the range list.
+        /// Any overlapping ranges will be (partially) removed.
+        /// </summary>
+        /// <param name="address">Start address of the modified region</param>
+        /// <param name="size">Size of the modified region in bytes</param>
+        public void SignalModified(ulong address, ulong size)
+        {
+            // Must lock, as this can affect flushes from the background thread.
+            lock (_lock)
+            {
+                // We may overlap with some existing modified regions. They must be cut into by the new entry.
+                int count = FindOverlapsNonOverlapping(address, size, ref _foregroundOverlaps);
+
+                ulong endAddress = address + size;
+                ulong syncNumber = _context.SyncNumber;
+
+                for (int i = 0; i < count; i++)
+                {
+                    // The overlaps must be removed or split.
+
+                    BufferModifiedRange overlap = _foregroundOverlaps[i];
+
+                    if (overlap.Address == address && overlap.Size == size)
+                    {
+                        // Region already exists. Just update the existing sync number.
+                        overlap.SyncNumber = syncNumber;
+
+                        return;
+                    }
+
+                    Remove(overlap);
+
+                    if (overlap.Address < address && overlap.EndAddress > address)
+                    {
+                        // A split item must be created behind this overlap.
+
+                        Add(new BufferModifiedRange(overlap.Address, address - overlap.Address, overlap.SyncNumber));
+                    }
+
+                    if (overlap.Address < endAddress && overlap.EndAddress > endAddress)
+                    {
+                        // A split item must be created after this overlap.
+
+                        Add(new BufferModifiedRange(endAddress, overlap.EndAddress - endAddress, overlap.SyncNumber));
+                    }
+                }
+
+                Add(new BufferModifiedRange(address, size, syncNumber));
+            }
+        }
+
+        /// <summary>
+        /// Gets modified ranges within the specified region, and then fires the given action for each range individually.
+        /// </summary>
+        /// <param name="address">Start address to query</param>
+        /// <param name="size">Size to query</param>
+        /// <param name="rangeAction">The action to call for each modified range</param>
+        public void GetRanges(ulong address, ulong size, Action<ulong, ulong> rangeAction)
+        {
+            int count = 0;
+
+            // Range list must be consistent for this operation.
+            lock (_lock)
+            {
+                count = FindOverlapsNonOverlapping(address, size, ref _foregroundOverlaps);
+            }
+
+            for (int i = 0; i < count; i++)
+            {
+                BufferModifiedRange overlap = _foregroundOverlaps[i];
+                rangeAction(overlap.Address, overlap.Size);
+            }
+        }
+
+        /// <summary>
+        /// Queries if a range exists within the specified region.
+        /// </summary>
+        /// <param name="address">Start address to query</param>
+        /// <param name="size">Size to query</param>
+        /// <returns>True if a range exists in the specified region, false otherwise</returns>
+        public bool HasRange(ulong address, ulong size)
+        {
+            // Range list must be consistent for this operation.
+            lock (_lock)
+            {
+                return FindOverlapsNonOverlapping(address, size, ref _foregroundOverlaps) > 0;
+            }
+        }
+
+        /// <summary>
+        /// Gets modified ranges within the specified region, waits on ones from a previous sync number,
+        /// and then fires the given action for each range individually.
+        /// </summary>
+        /// <remarks>
+        /// This function assumes it is called from the background thread.
+        /// Modifications from the current sync number are ignored because the guest should not expect them to be available yet.
+        /// They will remain reserved, so that any data sync prioritizes the data in the GPU.
+        /// </remarks>
+        /// <param name="address">Start address to query</param>
+        /// <param name="size">Size to query</param>
+        /// <param name="rangeAction">The action to call for each modified range</param>
+        public void WaitForAndGetRanges(ulong address, ulong size, Action<ulong, ulong> rangeAction)
+        {
+            ulong endAddress = address + size;
+            ulong currentSync = _context.SyncNumber;
+
+            int rangeCount = 0;
+
+            // Range list must be consistent for this operation
+            lock (_lock)
+            {
+                rangeCount = FindOverlapsNonOverlapping(address, size, ref _backgroundOverlaps);
+            }
+
+            if (rangeCount == 0)
+            {
+                return;
+            }
+
+            // First, determine which syncpoint to wait on.
+            // This is the latest syncpoint that is not equal to the current sync.
+
+            long highestDiff = long.MinValue;
+
+            for (int i = 0; i < rangeCount; i++)
+            {
+                BufferModifiedRange overlap = _backgroundOverlaps[i];
+
+                long diff = (long)(overlap.SyncNumber - currentSync);
+
+                if (diff < 0 && diff > highestDiff)
+                {
+                    highestDiff = diff;
+                }
+            }
+
+            if (highestDiff == long.MinValue)
+            {
+                return;
+            }
+
+            // Wait for the syncpoint.
+            _context.Renderer.WaitSync(currentSync + (ulong)highestDiff);
+
+            // Flush and remove all regions with the older syncpoint.
+            lock (_lock)
+            {
+                for (int i = 0; i < rangeCount; i++)
+                {
+                    BufferModifiedRange overlap = _backgroundOverlaps[i];
+
+                    long diff = (long)(overlap.SyncNumber - currentSync);
+
+                    if (diff <= highestDiff)
+                    {
+                        ulong clampAddress = Math.Max(address, overlap.Address);
+                        ulong clampEnd = Math.Min(endAddress, overlap.EndAddress);
+
+                        ClearPart(overlap, clampAddress, clampEnd);
+
+                        rangeAction(clampAddress, clampEnd - clampAddress);
+                    }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Inherit ranges from another modified range list.
+        /// </summary>
+        /// <param name="ranges">The range list to inherit from</param>
+        /// <param name="rangeAction">The action to call for each modified range</param>
+        public void InheritRanges(BufferModifiedRangeList ranges, Action<ulong, ulong> rangeAction)
+        {
+            BufferModifiedRange[] inheritRanges;
+
+            lock (ranges._lock)
+            {
+                inheritRanges = ranges.ToArray();
+            }
+
+            lock (_lock)
+            {
+                foreach (BufferModifiedRange range in inheritRanges)
+                {
+                    Add(range);
+                }
+            }
+
+            ulong currentSync = _context.SyncNumber;
+            foreach (BufferModifiedRange range in inheritRanges)
+            {
+                if (range.SyncNumber != currentSync)
+                {
+                    rangeAction(range.Address, range.Size);
+                }
+            }
+        }
+
+        private void ClearPart(BufferModifiedRange overlap, ulong address, ulong endAddress)
+        {
+            Remove(overlap);
+
+            // If the overlap extends outside of the clear range, make sure those parts still exist.
+
+            if (overlap.Address < address)
+            {
+                Add(new BufferModifiedRange(overlap.Address, address - overlap.Address, overlap.SyncNumber));
+            }
+
+            if (overlap.EndAddress > endAddress)
+            {
+                Add(new BufferModifiedRange(endAddress, overlap.EndAddress - endAddress, overlap.SyncNumber));
+            }
+        }
+
+        /// <summary>
+        /// Clear modified ranges within the specified area.
+        /// </summary>
+        /// <param name="address">Start address to clear</param>
+        /// <param name="size">Size to clear</param>
+        public void Clear(ulong address, ulong size)
+        {
+            lock (_lock)
+            {
+                // This function can be called from any thread, so it cannot use the arrays for background or foreground.
+                BufferModifiedRange[] toClear = new BufferModifiedRange[1];
+
+                int rangeCount = FindOverlapsNonOverlapping(address, size, ref toClear);
+
+                ulong endAddress = address + size;
+
+                for (int i = 0; i < rangeCount; i++)
+                {
+                    BufferModifiedRange overlap = toClear[i];
+
+                    ClearPart(overlap, address, endAddress);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Clear all modified ranges.
+        /// </summary>
+        public void Clear()
+        {
+            lock (_lock)
+            {
+                Items.Clear();
+            }
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
index 3da22b22f..7021cd209 100644
--- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
+++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs
@@ -61,6 +61,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
         /// </summary>
         /// <param name="va">GPU virtual address where the data is located</param>
         /// <param name="size">Size of the data</param>
+        /// <param name="tracked">True if read tracking is triggered on the span</param>
         /// <returns>The span of the data at the specified memory location</returns>
         public ReadOnlySpan<byte> GetSpan(ulong va, int size, bool tracked = false)
         {
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index 20e1c9f84..1dbe1805a 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -34,7 +34,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <summary>
         /// Version of the codegen (to be changed when codegen or guest format change).
         /// </summary>
-        private const ulong ShaderCodeGenVersion = 1910;
+        private const ulong ShaderCodeGenVersion = 1790;
 
         /// <summary>
         /// Creates a new instance of the shader cache.
diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs
index acbc24de0..4a3f51bfd 100644
--- a/Ryujinx.Graphics.OpenGL/Renderer.cs
+++ b/Ryujinx.Graphics.OpenGL/Renderer.cs
@@ -26,6 +26,8 @@ namespace Ryujinx.Graphics.OpenGL
         private TextureCopy _backgroundTextureCopy;
         internal TextureCopy TextureCopy => BackgroundContextWorker.InBackground ? _backgroundTextureCopy : _textureCopy;
 
+        private Sync _sync;
+
         internal ResourcePool ResourcePool { get; }
 
         public string GpuVendor { get; private set; }
@@ -39,6 +41,7 @@ namespace Ryujinx.Graphics.OpenGL
             _window = new Window(this);
             _textureCopy = new TextureCopy(this);
             _backgroundTextureCopy = new TextureCopy(this);
+            _sync = new Sync();
             ResourcePool = new ResourcePool();
         }
 
@@ -108,6 +111,7 @@ namespace Ryujinx.Graphics.OpenGL
 
         public void PreFrame()
         {
+            _sync.Cleanup();
             ResourcePool.Tick();
         }
 
@@ -164,6 +168,7 @@ namespace Ryujinx.Graphics.OpenGL
             _pipeline.Dispose();
             _window.Dispose();
             _counters.Dispose();
+            _sync.Dispose();
         }
 
         public IProgram LoadProgramBinary(byte[] programBinary)
@@ -179,5 +184,15 @@ namespace Ryujinx.Graphics.OpenGL
 
             return null;
         }
+
+        public void CreateSync(ulong id)
+        {
+            _sync.Create(id);
+        }
+
+        public void WaitSync(ulong id)
+        {
+            _sync.Wait(id);
+        }
     }
 }
diff --git a/Ryujinx.Graphics.OpenGL/Sync.cs b/Ryujinx.Graphics.OpenGL/Sync.cs
new file mode 100644
index 000000000..97a71fc4b
--- /dev/null
+++ b/Ryujinx.Graphics.OpenGL/Sync.cs
@@ -0,0 +1,129 @@
+using OpenTK.Graphics.OpenGL;
+using Ryujinx.Common.Logging;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Ryujinx.Graphics.OpenGL
+{
+    class Sync : IDisposable
+    {
+        private class SyncHandle
+        {
+            public ulong ID;
+            public IntPtr Handle;
+        }
+
+        private ulong _firstHandle = 0;
+
+        private List<SyncHandle> Handles = new List<SyncHandle>();
+
+        public void Create(ulong id)
+        {
+            SyncHandle handle = new SyncHandle
+            {
+                ID = id,
+                Handle = GL.FenceSync(SyncCondition.SyncGpuCommandsComplete, WaitSyncFlags.None)
+            };
+
+            lock (Handles)
+            {
+                Handles.Add(handle);
+            }
+        }
+
+        public void Wait(ulong id)
+        {
+            SyncHandle result = null;
+
+            lock (Handles)
+            {
+                if ((long)(_firstHandle - id) > 0)
+                {
+                    return; // The handle has already been signalled or deleted.
+                }
+
+                foreach (SyncHandle handle in Handles)
+                {
+                    if (handle.ID == id)
+                    {
+                        result = handle;
+                        break;
+                    }
+                }
+            }
+
+            if (result != null)
+            {
+                lock (result)
+                {
+                    if (result.Handle == IntPtr.Zero)
+                    {
+                        return;
+                    }
+
+                    WaitSyncStatus syncResult = GL.ClientWaitSync(result.Handle, ClientWaitSyncFlags.SyncFlushCommandsBit, 1000000000);
+                    
+                    if (syncResult == WaitSyncStatus.TimeoutExpired)
+                    {
+                        Logger.Error?.PrintMsg(LogClass.Gpu, $"GL Sync Object {result.ID} failed to signal within 1000ms. Continuing...");
+                    }
+                }
+            }
+        }
+
+        public void Cleanup()
+        {
+            // Iterate through handles and remove any that have already been signalled.
+
+            while (true)
+            {
+                SyncHandle first = null;
+                lock (Handles)
+                {
+                    first = Handles.FirstOrDefault();
+                }
+
+                if (first == null) break;
+
+                WaitSyncStatus syncResult = GL.ClientWaitSync(first.Handle, ClientWaitSyncFlags.SyncFlushCommandsBit, 0);
+
+                if (syncResult == WaitSyncStatus.AlreadySignaled)
+                {
+                    // Delete the sync object.
+                    lock (Handles)
+                    {
+                        lock (first)
+                        {
+                            _firstHandle = first.ID + 1;
+                            Handles.RemoveAt(0);
+                            GL.DeleteSync(first.Handle);
+                            first.Handle = IntPtr.Zero;
+                        }
+                    }
+                } else
+                {
+                    // This sync handle and any following have not been reached yet.
+                    break;
+                }
+            }
+        }
+
+        public void Dispose()
+        {
+            lock (Handles)
+            {
+                foreach (SyncHandle handle in Handles)
+                {
+                    lock (handle)
+                    {
+                        GL.DeleteSync(handle.Handle);
+                        handle.Handle = IntPtr.Zero;
+                    }
+                }
+
+                Handles.Clear();
+            }
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Shader/BufferDescriptor.cs b/Ryujinx.Graphics.Shader/BufferDescriptor.cs
index 53a4fb164..a3af6e41f 100644
--- a/Ryujinx.Graphics.Shader/BufferDescriptor.cs
+++ b/Ryujinx.Graphics.Shader/BufferDescriptor.cs
@@ -4,11 +4,21 @@ namespace Ryujinx.Graphics.Shader
     {
         public readonly int Binding;
         public readonly int Slot;
+        public BufferUsageFlags Flags;
 
         public BufferDescriptor(int binding, int slot)
         {
             Binding = binding;
             Slot = slot;
+
+            Flags = BufferUsageFlags.None;
+        }
+
+        public BufferDescriptor SetFlag(BufferUsageFlags flag)
+        {
+            Flags |= flag;
+
+            return this;
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/BufferUsageFlags.cs b/Ryujinx.Graphics.Shader/BufferUsageFlags.cs
new file mode 100644
index 000000000..657546cb7
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/BufferUsageFlags.cs
@@ -0,0 +1,18 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader
+{
+    /// <summary>
+    /// Flags that indicate how a buffer will be used in a shader.
+    /// </summary>
+    [Flags]
+    public enum BufferUsageFlags
+    {
+        None = 0,
+
+        /// <summary>
+        /// Buffer is written to.
+        /// </summary>
+        Write = 1 << 0
+    }
+}
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
index 6244f68b6..3bfc06475 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
@@ -298,6 +298,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
 
             string src = TypeConversion.ReinterpretCast(context, src3, srcType, VariableType.U32);
 
+            SetStorageWriteFlag(context, src1, context.Config.Stage);
             string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
 
             return $"{sb} = {src}";
@@ -629,6 +630,32 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
             }
         }
 
+        private static void SetStorageWriteFlag(CodeGenContext context, IAstNode indexExpr, ShaderStage stage)
+        {
+            // Attempt to find a BufferDescriptor with the given index.
+            // If it cannot be resolved or is not constant, assume that the slot expression could potentially index any of them,
+            // and set the flag on all storage buffers.
+
+            int index = -1;
+
+            if (indexExpr is AstOperand operand && operand.Type == OperandType.Constant)
+            {
+                index = context.SBufferDescriptors.FindIndex(buffer => buffer.Slot == operand.Value);
+            }
+
+            if (index != -1)
+            {
+                context.SBufferDescriptors[index] = context.SBufferDescriptors[index].SetFlag(BufferUsageFlags.Write);
+            }
+            else
+            {
+                for (int i = 0; i < context.SBufferDescriptors.Count; i++)
+                {
+                    context.SBufferDescriptors[i] = context.SBufferDescriptors[i].SetFlag(BufferUsageFlags.Write);
+                }
+            }
+        }
+
         private static string GetStorageBufferAccessor(string slotExpr, string offsetExpr, ShaderStage stage)
         {
             string sbName = OperandManager.GetShaderStagePrefix(stage);
diff --git a/Ryujinx.Memory.Tests/MockVirtualMemoryManager.cs b/Ryujinx.Memory.Tests/MockVirtualMemoryManager.cs
index 62b3ee4a1..0dd2ce461 100644
--- a/Ryujinx.Memory.Tests/MockVirtualMemoryManager.cs
+++ b/Ryujinx.Memory.Tests/MockVirtualMemoryManager.cs
@@ -6,6 +6,8 @@ namespace Ryujinx.Memory.Tests
     {
         public bool NoMappings;
 
+        public event Action<ulong, ulong, MemoryPermission> OnProtect;
+
         public MockVirtualMemoryManager(ulong size, int pageSize)
         {
         }
@@ -82,6 +84,7 @@ namespace Ryujinx.Memory.Tests
 
         public void TrackingReprotect(ulong va, ulong size, MemoryPermission protection)
         {
+            OnProtect?.Invoke(va, size, protection);
         }
     }
 }
diff --git a/Ryujinx.Memory.Tests/TrackingTests.cs b/Ryujinx.Memory.Tests/TrackingTests.cs
index 25c230922..a9cc6df37 100644
--- a/Ryujinx.Memory.Tests/TrackingTests.cs
+++ b/Ryujinx.Memory.Tests/TrackingTests.cs
@@ -421,5 +421,68 @@ namespace Ryujinx.Memory.Tests
 
             Assert.AreEqual((0, 0), _tracking.GetRegionCounts());
         }
+
+        [Test]
+        public void ReadAndWriteProtection()
+        {
+            MemoryPermission protection = MemoryPermission.ReadAndWrite;
+
+            _memoryManager.OnProtect += (va, size, newProtection) =>
+            {
+                Assert.AreEqual((0, PageSize), (va, size)); // Should protect the exact region all the operations use.
+                protection = newProtection;
+            };
+
+            RegionHandle handle = _tracking.BeginTracking(0, PageSize);
+
+            // After creating the handle, there is no protection yet.
+            Assert.AreEqual(MemoryPermission.ReadAndWrite, protection);
+
+            bool dirtyInitial = handle.Dirty;
+            Assert.True(dirtyInitial); // Handle starts dirty.
+
+            handle.Reprotect();
+
+            // After a reprotect, there is write protection, which will set a dirty flag when any write happens.
+            Assert.AreEqual(MemoryPermission.Read, protection);
+
+            (ulong address, ulong size)? readTrackingTriggered = null;
+            handle.RegisterAction((address, size) =>
+            {
+                readTrackingTriggered = (address, size);
+            });
+
+            // Registering an action adds read/write protection.
+            Assert.AreEqual(MemoryPermission.None, protection);
+
+            bool dirtyAfterReprotect = handle.Dirty;
+            Assert.False(dirtyAfterReprotect); // Handle is no longer dirty.
+
+            // First we should read, which will trigger the action. This _should not_ remove write protection on the memory.
+
+            _tracking.VirtualMemoryEvent(0, 4, false);
+
+            bool dirtyAfterRead = handle.Dirty;
+            Assert.False(dirtyAfterRead); // Not dirtied, as this was a read.
+
+            Assert.AreEqual(readTrackingTriggered, (0UL, 4UL)); // Read action was triggered.
+
+            Assert.AreEqual(MemoryPermission.Read, protection); // Write protection is still present.
+
+            readTrackingTriggered = null;
+
+            // Now, perform a write.
+
+            _tracking.VirtualMemoryEvent(0, 4, true);
+
+            bool dirtyAfterWriteAfterRead = handle.Dirty;
+            Assert.True(dirtyAfterWriteAfterRead); // Should be dirty.
+
+            Assert.AreEqual(MemoryPermission.ReadAndWrite, protection); // All protection is now be removed from the memory.
+
+            Assert.IsNull(readTrackingTriggered); // Read tracking was removed when the action fired, as it can only fire once.
+
+            handle.Dispose();
+        }
     }
 }
diff --git a/Ryujinx.Memory/Range/RangeList.cs b/Ryujinx.Memory/Range/RangeList.cs
index 3c8c4c4cd..fd2606563 100644
--- a/Ryujinx.Memory/Range/RangeList.cs
+++ b/Ryujinx.Memory/Range/RangeList.cs
@@ -12,16 +12,16 @@ namespace Ryujinx.Memory.Range
     {
         private const int ArrayGrowthSize = 32;
 
-        private readonly List<T> _items;
+        protected readonly List<T> Items;
 
-        public int Count => _items.Count;
+        public int Count => Items.Count;
 
         /// <summary>
         /// Creates a new range list.
         /// </summary>
         public RangeList()
         {
-            _items = new List<T>();
+            Items = new List<T>();
         }
 
         /// <summary>
@@ -37,7 +37,7 @@ namespace Ryujinx.Memory.Range
                 index = ~index;
             }
 
-            _items.Insert(index, item);
+            Items.Insert(index, item);
         }
 
         /// <summary>
@@ -51,21 +51,21 @@ namespace Ryujinx.Memory.Range
 
             if (index >= 0)
             {
-                while (index > 0 && _items[index - 1].Address == item.Address)
+                while (index > 0 && Items[index - 1].Address == item.Address)
                 {
                     index--;
                 }
 
-                while (index < _items.Count)
+                while (index < Items.Count)
                 {
-                    if (_items[index].Equals(item))
+                    if (Items[index].Equals(item))
                     {
-                        _items.RemoveAt(index);
+                        Items.RemoveAt(index);
 
                         return true;
                     }
 
-                    if (_items[index].Address > item.Address)
+                    if (Items[index].Address > item.Address)
                     {
                         break;
                     }
@@ -110,7 +110,7 @@ namespace Ryujinx.Memory.Range
                 return default(T);
             }
 
-            return _items[index];
+            return Items[index];
         }
 
         /// <summary>
@@ -137,7 +137,7 @@ namespace Ryujinx.Memory.Range
 
             ulong endAddress = address + size;
 
-            foreach (T item in _items)
+            foreach (T item in Items)
             {
                 if (item.Address >= endAddress)
                 {
@@ -196,7 +196,7 @@ namespace Ryujinx.Memory.Range
 
             if (index >= 0)
             {
-                while (index > 0 && _items[index - 1].OverlapsWith(address, size))
+                while (index > 0 && Items[index - 1].OverlapsWith(address, size))
                 {
                     index--;
                 }
@@ -208,9 +208,9 @@ namespace Ryujinx.Memory.Range
                         Array.Resize(ref output, outputIndex + ArrayGrowthSize);
                     }
 
-                    output[outputIndex++] = _items[index++];
+                    output[outputIndex++] = Items[index++];
                 }
-                while (index < _items.Count && _items[index].OverlapsWith(address, size));
+                while (index < Items.Count && Items[index].OverlapsWith(address, size));
             }
 
             return outputIndex;
@@ -230,14 +230,14 @@ namespace Ryujinx.Memory.Range
 
             if (index >= 0)
             {
-                while (index > 0 && _items[index - 1].Address == address)
+                while (index > 0 && Items[index - 1].Address == address)
                 {
                     index--;
                 }
 
-                while (index < _items.Count)
+                while (index < Items.Count)
                 {
-                    T overlap = _items[index++];
+                    T overlap = Items[index++];
 
                     if (overlap.Address != address)
                     {
@@ -264,7 +264,7 @@ namespace Ryujinx.Memory.Range
         private int BinarySearch(ulong address)
         {
             int left  = 0;
-            int right = _items.Count - 1;
+            int right = Items.Count - 1;
 
             while (left <= right)
             {
@@ -272,7 +272,7 @@ namespace Ryujinx.Memory.Range
 
                 int middle = left + (range >> 1);
 
-                T item = _items[middle];
+                T item = Items[middle];
 
                 if (item.Address == address)
                 {
@@ -301,7 +301,7 @@ namespace Ryujinx.Memory.Range
         private int BinarySearch(ulong address, ulong size)
         {
             int left  = 0;
-            int right = _items.Count - 1;
+            int right = Items.Count - 1;
 
             while (left <= right)
             {
@@ -309,7 +309,7 @@ namespace Ryujinx.Memory.Range
 
                 int middle = left + (range >> 1);
 
-                T item = _items[middle];
+                T item = Items[middle];
 
                 if (item.OverlapsWith(address, size))
                 {
@@ -331,12 +331,12 @@ namespace Ryujinx.Memory.Range
 
         public IEnumerator<T> GetEnumerator()
         {
-            return _items.GetEnumerator();
+            return Items.GetEnumerator();
         }
 
         IEnumerator IEnumerable.GetEnumerator()
         {
-            return _items.GetEnumerator();
+            return Items.GetEnumerator();
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Memory/Tracking/MultiRegionHandle.cs b/Ryujinx.Memory/Tracking/MultiRegionHandle.cs
index 02ae3a8bb..df154bc22 100644
--- a/Ryujinx.Memory/Tracking/MultiRegionHandle.cs
+++ b/Ryujinx.Memory/Tracking/MultiRegionHandle.cs
@@ -123,6 +123,17 @@ namespace Ryujinx.Memory.Tracking
             }
         }
 
+        public void RegisterAction(ulong address, ulong size, RegionSignal action)
+        {
+            int startHandle = (int)((address - Address) / Granularity);
+            int lastHandle = (int)((address + (size - 1) - Address) / Granularity);
+
+            for (int i = startHandle; i <= lastHandle; i++)
+            {
+                _handles[i].RegisterAction(action);
+            }
+        }
+
         public void Dispose()
         {
             foreach (var handle in _handles)
diff --git a/Ryujinx.Memory/Tracking/RegionHandle.cs b/Ryujinx.Memory/Tracking/RegionHandle.cs
index 96898c214..3ddcb6db4 100644
--- a/Ryujinx.Memory/Tracking/RegionHandle.cs
+++ b/Ryujinx.Memory/Tracking/RegionHandle.cs
@@ -24,6 +24,7 @@ namespace Ryujinx.Memory.Tracking
         private readonly MemoryTracking _tracking;
 
         internal MemoryPermission RequiredPermission => _preAction != null ? MemoryPermission.None : (Dirty ? MemoryPermission.ReadAndWrite : MemoryPermission.Read);
+        internal RegionSignal PreAction => _preAction;
 
         /// <summary>
         /// Create a new region handle. The handle is registered with the given tracking object,
diff --git a/Ryujinx.Memory/Tracking/SmartMultiRegionHandle.cs b/Ryujinx.Memory/Tracking/SmartMultiRegionHandle.cs
index 601884001..8bc10c411 100644
--- a/Ryujinx.Memory/Tracking/SmartMultiRegionHandle.cs
+++ b/Ryujinx.Memory/Tracking/SmartMultiRegionHandle.cs
@@ -41,6 +41,17 @@ namespace Ryujinx.Memory.Tracking
             Dirty = true;
         }
 
+        public void RegisterAction(RegionSignal action)
+        {
+            foreach (var handle in _handles)
+            {
+                if (handle != null)
+                {
+                    handle?.RegisterAction((address, size) => action(handle.Address, handle.Size));
+                }
+            }
+        }
+
         public void QueryModified(Action<ulong, ulong> modifiedAction)
         {
             if (!Dirty)
@@ -66,14 +77,23 @@ namespace Ryujinx.Memory.Tracking
             ulong size = HandlesToBytes(splitIndex - handleIndex);
 
             // First, the target handle must be removed. Its data can still be used to determine the new handles.
+            RegionSignal signal = handle.PreAction;
             handle.Dispose();
 
             RegionHandle splitLow = _tracking.BeginTracking(address, size);
             splitLow.Parent = this;
+            if (signal != null)
+            {
+                splitLow.RegisterAction(signal);
+            }
             _handles[handleIndex] = splitLow;
 
             RegionHandle splitHigh = _tracking.BeginTracking(address + size, handle.Size - size);
             splitHigh.Parent = this;
+            if (signal != null)
+            {
+                splitHigh.RegisterAction(signal);
+            }
             _handles[splitIndex] = splitHigh;
         }
 
diff --git a/Ryujinx.Memory/Tracking/VirtualRegion.cs b/Ryujinx.Memory/Tracking/VirtualRegion.cs
index 90fb55d65..15a11568e 100644
--- a/Ryujinx.Memory/Tracking/VirtualRegion.cs
+++ b/Ryujinx.Memory/Tracking/VirtualRegion.cs
@@ -22,12 +22,12 @@ namespace Ryujinx.Memory.Tracking
 
         public override void Signal(ulong address, ulong size, bool write)
         {
-            _tracking.ProtectVirtualRegion(this, MemoryPermission.ReadAndWrite); // Remove our protection immedately.
-
             foreach (var handle in Handles)
             {
                 handle.Signal(address, size, write);
             }
+
+            UpdateProtection();
         }
 
         /// <summary>