From db4242c5dcca2df90374f58ddeb3601c1eb06cce Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 24 Apr 2023 10:28:03 -0300 Subject: [PATCH] Implement DMA texture copy component shuffle (#4717) * Implement DMA texture copy component shuffle * Set UInt24 alignment to 1 --- Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs | 287 ++++++++++++++++---- 1 file changed, 234 insertions(+), 53 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs index aa94f1f88..fd93cd8ba 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs @@ -6,6 +6,7 @@ using Ryujinx.Graphics.Texture; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Runtime.Intrinsics; namespace Ryujinx.Graphics.Gpu.Engine.Dma @@ -32,6 +33,69 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma RemapEnable = 1 << 10 } + /// + /// Texture parameters for copy. + /// + private struct TextureParams + { + /// + /// Copy region X coordinate. + /// + public readonly int RegionX; + + /// + /// Copy region Y coordinate. + /// + public readonly int RegionY; + + /// + /// Offset from the base pointer of the data in memory. + /// + public readonly int BaseOffset; + + /// + /// Bytes per pixel. + /// + public readonly int Bpp; + + /// + /// Whether the texture is linear. If false, the texture is block linear. + /// + public readonly bool Linear; + + /// + /// Pixel offset from XYZ coordinates calculator. + /// + public readonly OffsetCalculator Calculator; + + /// + /// Creates texture parameters. + /// + /// Copy region X coordinate + /// Copy region Y coordinate + /// Offset from the base pointer of the data in memory + /// Bytes per pixel + /// Whether the texture is linear. If false, the texture is block linear + /// Pixel offset from XYZ coordinates calculator + public TextureParams(int regionX, int regionY, int baseOffset, int bpp, bool linear, OffsetCalculator calculator) + { + RegionX = regionX; + RegionY = regionY; + BaseOffset = baseOffset; + Bpp = bpp; + Linear = linear; + Calculator = calculator; + } + } + + [StructLayout(LayoutKind.Sequential, Size = 3, Pack = 1)] + private struct UInt24 + { + public byte Byte0; + public byte Byte1; + public byte Byte2; + } + /// /// Creates a new instance of the DMA copy engine class. /// @@ -154,8 +218,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma { // Buffer to texture copy. int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1; - int srcBpp = remap ? ((int)_state.State.SetRemapComponentsNumSrcComponents + 1) * componentSize : 1; - int dstBpp = remap ? ((int)_state.State.SetRemapComponentsNumDstComponents + 1) * componentSize : 1; + int srcComponents = (int)_state.State.SetRemapComponentsNumSrcComponents + 1; + int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1; + int srcBpp = remap ? srcComponents * componentSize : 1; + int dstBpp = remap ? dstComponents * componentSize : 1; var dst = Unsafe.As(ref _state.State.SetDstBlockSize); var src = Unsafe.As(ref _state.State.SetSrcBlockSize); @@ -274,63 +340,51 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma } } - unsafe bool Convert(Span dstSpan, ReadOnlySpan srcSpan) where T : unmanaged - { - if (srcLinear && dstLinear && srcBpp == dstBpp) - { - // Optimized path for purely linear copies - we don't need to calculate every single byte offset, - // and we can make use of Span.CopyTo which is very very fast (even compared to pointers) - for (int y = 0; y < yCount; y++) - { - srcCalculator.SetY(srcRegionY + y); - dstCalculator.SetY(dstRegionY + y); - int srcOffset = srcCalculator.GetOffset(srcRegionX); - int dstOffset = dstCalculator.GetOffset(dstRegionX); - srcSpan.Slice(srcOffset - srcBaseOffset, xCount * srcBpp) - .CopyTo(dstSpan.Slice(dstOffset - dstBaseOffset, xCount * dstBpp)); - } - } - else - { - fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) - { - byte* dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. - byte* srcBase = srcPtr - srcBaseOffset; - - for (int y = 0; y < yCount; y++) - { - srcCalculator.SetY(srcRegionY + y); - dstCalculator.SetY(dstRegionY + y); - - for (int x = 0; x < xCount; x++) - { - int srcOffset = srcCalculator.GetOffset(srcRegionX + x); - int dstOffset = dstCalculator.GetOffset(dstRegionX + x); - - *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset); - } - } - } - } - - return true; - } - // OPT: This allocates a (potentially) huge temporary array and then copies an existing // region of memory into it, data that might get overwritten entirely anyways. Ideally this should // all be rewritten to use pooled arrays, but that gets complicated with packed data and strides Span dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray(); - bool _ = srcBpp switch + TextureParams srcParams = new TextureParams(srcRegionX, srcRegionY, srcBaseOffset, srcBpp, srcLinear, srcCalculator); + TextureParams dstParams = new TextureParams(dstRegionX, dstRegionY, dstBaseOffset, dstBpp, dstLinear, dstCalculator); + + // If remapping is enabled, we always copy the components directly, in order. + // If it's enabled, but the mapping is just XYZW, we also copy them in order. + bool isIdentityRemap = !remap || + (_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.SrcX && + (dstComponents < 2 || _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.SrcY) && + (dstComponents < 3 || _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.SrcZ) && + (dstComponents < 4 || _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.SrcW)); + + if (isIdentityRemap) { - 1 => Convert(dstSpan, srcSpan), - 2 => Convert(dstSpan, srcSpan), - 4 => Convert(dstSpan, srcSpan), - 8 => Convert(dstSpan, srcSpan), - 12 => Convert(dstSpan, srcSpan), - 16 => Convert>(dstSpan, srcSpan), - _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.") - }; + // The order of the components doesn't change, so we can just copy directly + // (with layout conversion if necessary). + + switch (srcBpp) + { + case 1: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 2: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 4: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 8: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 12: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 16: Copy>(dstSpan, srcSpan, dstParams, srcParams); break; + default: throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format."); + } + } + else + { + // The order or value of the components might change. + + switch (componentSize) + { + case 1: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break; + case 2: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break; + case 3: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break; + case 4: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break; + default: throw new NotSupportedException($"Unable to copy ${componentSize} component size."); + } + } memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan); } @@ -372,6 +426,133 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma } } + /// + /// Copies data from one texture to another, while performing layout conversion if necessary. + /// + /// Pixel type + /// Destination texture memory region + /// Source texture memory region + /// Destination texture parameters + /// Source texture parameters + private unsafe void Copy(Span dstSpan, ReadOnlySpan srcSpan, TextureParams dst, TextureParams src) where T : unmanaged + { + int xCount = (int)_state.State.LineLengthIn; + int yCount = (int)_state.State.LineCount; + + if (src.Linear && dst.Linear && src.Bpp == dst.Bpp) + { + // Optimized path for purely linear copies - we don't need to calculate every single byte offset, + // and we can make use of Span.CopyTo which is very very fast (even compared to pointers) + for (int y = 0; y < yCount; y++) + { + src.Calculator.SetY(src.RegionY + y); + dst.Calculator.SetY(dst.RegionY + y); + int srcOffset = src.Calculator.GetOffset(src.RegionX); + int dstOffset = dst.Calculator.GetOffset(dst.RegionX); + srcSpan.Slice(srcOffset - src.BaseOffset, xCount * src.Bpp) + .CopyTo(dstSpan.Slice(dstOffset - dst.BaseOffset, xCount * dst.Bpp)); + } + } + else + { + fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) + { + byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. + byte* srcBase = srcPtr - src.BaseOffset; + + for (int y = 0; y < yCount; y++) + { + src.Calculator.SetY(src.RegionY + y); + dst.Calculator.SetY(dst.RegionY + y); + + for (int x = 0; x < xCount; x++) + { + int srcOffset = src.Calculator.GetOffset(src.RegionX + x); + int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x); + + *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset); + } + } + } + } + } + + /// + /// Sets texture pixel data to a constant value, while performing layout conversion if necessary. + /// + /// Pixel type + /// Destination texture memory region + /// Destination texture parameters + /// Constant pixel value to be set + private unsafe void Fill(Span dstSpan, TextureParams dst, T fillValue) where T : unmanaged + { + int xCount = (int)_state.State.LineLengthIn; + int yCount = (int)_state.State.LineCount; + + fixed (byte* dstPtr = dstSpan) + { + byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. + + for (int y = 0; y < yCount; y++) + { + dst.Calculator.SetY(dst.RegionY + y); + + for (int x = 0; x < xCount; x++) + { + int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x); + + *(T*)(dstBase + dstOffset) = fillValue; + } + } + } + } + + /// + /// Copies data from one texture to another, while performing layout conversion and component shuffling if necessary. + /// + /// Pixel type + /// Destination texture memory region + /// Source texture memory region + /// Destination texture parameters + /// Source texture parameters + private void CopyShuffle(Span dstSpan, ReadOnlySpan srcSpan, TextureParams dst, TextureParams src) where T : unmanaged + { + int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1; + + for (int i = 0; i < dstComponents; i++) + { + SetRemapComponentsDst componentsDst = i switch + { + 0 => _state.State.SetRemapComponentsDstX, + 1 => _state.State.SetRemapComponentsDstY, + 2 => _state.State.SetRemapComponentsDstZ, + _ => _state.State.SetRemapComponentsDstW + }; + + switch (componentsDst) + { + case SetRemapComponentsDst.SrcX: + Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan, dst, src); + break; + case SetRemapComponentsDst.SrcY: + Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf()), dst, src); + break; + case SetRemapComponentsDst.SrcZ: + Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf() * 2), dst, src); + break; + case SetRemapComponentsDst.SrcW: + Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf() * 3), dst, src); + break; + case SetRemapComponentsDst.ConstA: + Fill(dstSpan.Slice(Unsafe.SizeOf() * i), dst, Unsafe.As(ref _state.State.SetRemapConstA)); + break; + case SetRemapComponentsDst.ConstB: + Fill(dstSpan.Slice(Unsafe.SizeOf() * i), dst, Unsafe.As(ref _state.State.SetRemapConstB)); + break; + } + } + } + /// /// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs. ///