diff --git a/Ryujinx.Ava/AppHost.cs b/Ryujinx.Ava/AppHost.cs index 09163bd60..27dd06e6e 100644 --- a/Ryujinx.Ava/AppHost.cs +++ b/Ryujinx.Ava/AppHost.cs @@ -1,5 +1,6 @@ using ARMeilleure.Translation; using ARMeilleure.Translation.PTC; +using Avalonia; using Avalonia.Input; using Avalonia.Threading; using LibHac.Tools.FsSystem; @@ -13,6 +14,7 @@ using Ryujinx.Ava.Common.Locale; using Ryujinx.Ava.Input; using Ryujinx.Ava.Ui.Controls; using Ryujinx.Ava.Ui.Models; +using Ryujinx.Ava.Ui.Vulkan; using Ryujinx.Ava.Ui.Windows; using Ryujinx.Common; using Ryujinx.Common.Configuration; @@ -22,6 +24,7 @@ using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.GAL.Multithreading; using Ryujinx.Graphics.Gpu; using Ryujinx.Graphics.OpenGL; +using Ryujinx.Graphics.Vulkan; using Ryujinx.HLE.FileSystem; using Ryujinx.HLE.HOS; using Ryujinx.HLE.HOS.Services.Account.Acc; @@ -590,7 +593,23 @@ namespace Ryujinx.Ava { VirtualFileSystem.ReloadKeySet(); - IRenderer renderer = new Renderer(); + IRenderer renderer; + + if (Program.UseVulkan) + { + var vulkan = AvaloniaLocator.Current.GetService(); + renderer = new VulkanGraphicsDevice(vulkan.Instance.InternalHandle, + vulkan.Device.InternalHandle, + vulkan.PhysicalDevice.InternalHandle, + vulkan.Device.Queue.InternalHandle, + vulkan.PhysicalDevice.QueueFamilyIndex, + vulkan.Device.Lock); + } + else + { + renderer = new Renderer(); + } + IHardwareDeviceDriver deviceDriver = new DummyHardwareDeviceDriver(); BackendThreading threadingMode = ConfigurationState.Instance.Graphics.BackendThreading; @@ -800,9 +819,12 @@ namespace Ryujinx.Ava _renderer.ScreenCaptured += Renderer_ScreenCaptured; - (_renderer as Renderer).InitializeBackgroundContext(SPBOpenGLContext.CreateBackgroundContext(Renderer.GameContext)); + if (!Program.UseVulkan) + { + (_renderer as Renderer).InitializeBackgroundContext(SPBOpenGLContext.CreateBackgroundContext((Renderer as OpenGLRendererControl).GameContext)); - Renderer.MakeCurrent(); + Renderer.MakeCurrent(); + } Device.Gpu.Renderer.Initialize(_glLogLevel); @@ -861,8 +883,6 @@ namespace Ryujinx.Ava dockedMode += $" ({scale}x)"; } - string vendor = _renderer is Renderer renderer ? renderer.GpuVendor : ""; - StatusUpdatedEvent?.Invoke(this, new StatusUpdatedEventArgs( Device.EnableDeviceVsync, Device.GetVolume(), @@ -870,7 +890,7 @@ namespace Ryujinx.Ava ConfigurationState.Instance.Graphics.AspectRatio.Value.ToText(), LocaleManager.Instance["Game"] + $": {Device.Statistics.GetGameFrameRate():00.00} FPS ({Device.Statistics.GetGameFrameTime():00.00} ms)", $"FIFO: {Device.Statistics.GetFifoPercent():00.00} %", - $"GPU: {vendor}")); + $"GPU: {_renderer.GetHardwareInfo().GpuVendor}")); Renderer.Present(image); } diff --git a/Ryujinx.Ava/Program.cs b/Ryujinx.Ava/Program.cs index c243ba96a..324bbcc76 100644 --- a/Ryujinx.Ava/Program.cs +++ b/Ryujinx.Ava/Program.cs @@ -3,6 +3,7 @@ using Avalonia; using Avalonia.OpenGL; using Avalonia.Rendering; using Avalonia.Threading; +using Ryujinx.Ava.Ui.Backend; using Ryujinx.Ava.Ui.Controls; using Ryujinx.Ava.Ui.Windows; using Ryujinx.Common; @@ -11,9 +12,12 @@ using Ryujinx.Common.GraphicsDriver; using Ryujinx.Common.Logging; using Ryujinx.Common.System; using Ryujinx.Common.SystemInfo; +using Ryujinx.Graphics.Vulkan; using Ryujinx.Modules; using Ryujinx.Ui.Common; using Ryujinx.Ui.Common.Configuration; +using Silk.NET.Vulkan.Extensions.EXT; +using Silk.NET.Vulkan.Extensions.KHR; using System; using System.Collections.Generic; using System.IO; @@ -25,17 +29,20 @@ namespace Ryujinx.Ava internal class Program { public static double WindowScaleFactor { get; set; } + public static double ActualScaleFactor { get; set; } public static string Version { get; private set; } public static string ConfigurationPath { get; private set; } public static string CommandLineProfile { get; set; } public static bool PreviewerDetached { get; private set; } public static RenderTimer RenderTimer { get; private set; } + public static bool UseVulkan { get; private set; } [DllImport("user32.dll", SetLastError = true)] public static extern int MessageBoxA(IntPtr hWnd, string text, string caption, uint type); private const uint MB_ICONWARNING = 0x30; + private const int BaseDpi = 96; public static void Main(string[] args) { @@ -66,7 +73,7 @@ namespace Ryujinx.Ava EnableMultiTouch = true, EnableIme = true, UseEGL = false, - UseGpu = true, + UseGpu = !UseVulkan, GlProfiles = new List() { new GlVersion(GlProfileType.OpenGL, 4, 3) @@ -75,7 +82,7 @@ namespace Ryujinx.Ava .With(new Win32PlatformOptions { EnableMultitouch = true, - UseWgl = true, + UseWgl = !UseVulkan, WglProfiles = new List() { new GlVersion(GlProfileType.OpenGL, 4, 3) @@ -84,6 +91,19 @@ namespace Ryujinx.Ava CompositionBackdropCornerRadius = 8f, }) .UseSkia() + .With(new Ui.Vulkan.VulkanOptions() + { + ApplicationName = "Ryujinx.Graphics.Vulkan", + VulkanVersion = new Version(1, 2), + MaxQueueCount = 2, + PreferDiscreteGpu = true, + PreferredDevice = !PreviewerDetached ? "" : ConfigurationState.Instance.Graphics.PreferredGpu.Value, + UseDebug = !PreviewerDetached ? false : ConfigurationState.Instance.Logger.GraphicsDebugLevel.Value != GraphicsDebugLevel.None, + }) + .With(new SkiaOptions() + { + CustomGpuFactory = UseVulkan ? SkiaGpuFactory.CreateVulkanGpu : null + }) .AfterSetup(_ => { AvaloniaLocator.CurrentMutable @@ -136,9 +156,6 @@ namespace Ryujinx.Ava } } - // Make process DPI aware for proper window sizing on high-res screens. - WindowScaleFactor = ForceDpiAware.GetWindowScaleFactor(); - // Delete backup files after updating. Task.Run(Updater.CleanupUpdate); @@ -162,6 +179,18 @@ namespace Ryujinx.Ava ReloadConfig(); + UseVulkan = PreviewerDetached ? ConfigurationState.Instance.Graphics.GraphicsBackend.Value == GraphicsBackend.Vulkan : false; + + if (UseVulkan) + { + // With a custom gpu backend, avalonia doesn't enable dpi awareness, so the backend must handle it. This isn't so for the opengl backed, + // as that uses avalonia's gpu backend and it's enabled there. + ForceDpiAware.Windows(); + } + + WindowScaleFactor = ForceDpiAware.GetWindowScaleFactor(); + ActualScaleFactor = ForceDpiAware.GetActualScaleFactor() / BaseDpi; + // Logging system information. PrintSystemInfo(); diff --git a/Ryujinx.Ava/Ryujinx.Ava.csproj b/Ryujinx.Ava/Ryujinx.Ava.csproj index 293243bac..102e28ac0 100644 --- a/Ryujinx.Ava/Ryujinx.Ava.csproj +++ b/Ryujinx.Ava/Ryujinx.Ava.csproj @@ -28,10 +28,13 @@ + - + + + @@ -39,6 +42,7 @@ + diff --git a/Ryujinx.Ava/Ui/Applet/AvaloniaDynamicTextInputHandler.cs b/Ryujinx.Ava/Ui/Applet/AvaloniaDynamicTextInputHandler.cs index 294e89654..02a99c1d1 100644 --- a/Ryujinx.Ava/Ui/Applet/AvaloniaDynamicTextInputHandler.cs +++ b/Ryujinx.Ava/Ui/Applet/AvaloniaDynamicTextInputHandler.cs @@ -135,7 +135,7 @@ namespace Ryujinx.Ava.Ui.Applet Dispatcher.UIThread.Post(() => { _hiddenTextBox.Clear(); - _parent.GlRenderer.Focus(); + _parent.RendererControl.Focus(); _parent = null; }); diff --git a/Ryujinx.Ava/Ui/Backend/BackendSurface.cs b/Ryujinx.Ava/Ui/Backend/BackendSurface.cs new file mode 100644 index 000000000..4b54cae2c --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/BackendSurface.cs @@ -0,0 +1,71 @@ +using Avalonia; +using System; +using System.Runtime.InteropServices; +using static Ryujinx.Ava.Ui.Backend.Interop; + +namespace Ryujinx.Ava.Ui.Backend +{ + public abstract class BackendSurface : IDisposable + { + protected IntPtr Display => _display; + + private IntPtr _display = IntPtr.Zero; + + [DllImport("libX11.so.6")] + public static extern IntPtr XOpenDisplay(IntPtr display); + + [DllImport("libX11.so.6")] + public static extern int XCloseDisplay(IntPtr display); + + private PixelSize _currentSize; + public IntPtr Handle { get; protected set; } + + public bool IsDisposed { get; private set; } + + public BackendSurface(IntPtr handle) + { + Handle = handle; + + if (OperatingSystem.IsLinux()) + { + _display = XOpenDisplay(IntPtr.Zero); + } + } + + public PixelSize Size + { + get + { + PixelSize size = new PixelSize(); + if (OperatingSystem.IsWindows()) + { + GetClientRect(Handle, out var rect); + size = new PixelSize(rect.right, rect.bottom); + } + else if (OperatingSystem.IsLinux()) + { + XWindowAttributes attributes = new XWindowAttributes(); + XGetWindowAttributes(Display, Handle, ref attributes); + + size = new PixelSize(attributes.width, attributes.height); + } + + _currentSize = size; + + return size; + } + } + + public PixelSize CurrentSize => _currentSize; + + public virtual void Dispose() + { + IsDisposed = true; + + if (_display != IntPtr.Zero) + { + XCloseDisplay(_display); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Ava/Ui/Backend/Interop.cs b/Ryujinx.Ava/Ui/Backend/Interop.cs new file mode 100644 index 000000000..617e97678 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Interop.cs @@ -0,0 +1,49 @@ +using FluentAvalonia.Interop; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Ava.Ui.Backend +{ + public static class Interop + { + [StructLayout(LayoutKind.Sequential)] + public struct XWindowAttributes + { + public int x; + public int y; + public int width; + public int height; + public int border_width; + public int depth; + public IntPtr visual; + public IntPtr root; + public int c_class; + public int bit_gravity; + public int win_gravity; + public int backing_store; + public IntPtr backing_planes; + public IntPtr backing_pixel; + public int save_under; + public IntPtr colormap; + public int map_installed; + public int map_state; + public IntPtr all_event_masks; + public IntPtr your_event_mask; + public IntPtr do_not_propagate_mask; + public int override_direct; + public IntPtr screen; + } + + [DllImport("user32.dll")] + public static extern bool GetClientRect(IntPtr hwnd, out RECT lpRect); + + [DllImport("libX11.so.6")] + public static extern int XCloseDisplay(IntPtr display); + + [DllImport("libX11.so.6")] + public static extern int XGetWindowAttributes(IntPtr display, IntPtr window, ref XWindowAttributes attributes); + + [DllImport("libX11.so.6")] + public static extern IntPtr XOpenDisplay(IntPtr display); + } +} diff --git a/Ryujinx.Ava/Ui/Backend/SkiaGpuFactory.cs b/Ryujinx.Ava/Ui/Backend/SkiaGpuFactory.cs new file mode 100644 index 000000000..db38c3835 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/SkiaGpuFactory.cs @@ -0,0 +1,23 @@ +using Avalonia; +using Avalonia.Skia; +using Ryujinx.Ava.Ui.Vulkan; +using Ryujinx.Ava.Ui.Backend.Vulkan; + +namespace Ryujinx.Ava.Ui.Backend +{ + public static class SkiaGpuFactory + { + public static ISkiaGpu CreateVulkanGpu() + { + var skiaOptions = AvaloniaLocator.Current.GetService() ?? new SkiaOptions(); + var platformInterface = AvaloniaLocator.Current.GetService(); + if (platformInterface == null) + { + VulkanPlatformInterface.TryInitialize(); + } + var gpu = new VulkanSkiaGpu(skiaOptions.MaxGpuResourceSizeBytes); + AvaloniaLocator.CurrentMutable.Bind().ToConstant(gpu); + return gpu; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/ResultExtensions.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/ResultExtensions.cs new file mode 100644 index 000000000..1fd88321d --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/ResultExtensions.cs @@ -0,0 +1,13 @@ +using System; +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + public static class ResultExtensions + { + public static void ThrowOnError(this Result result) + { + if (result != Result.Success) throw new Exception($"Unexpected API error \"{result}\"."); + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/Skia/VulkanRenderTarget.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/Skia/VulkanRenderTarget.cs new file mode 100644 index 000000000..88aeea92b --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/Skia/VulkanRenderTarget.cs @@ -0,0 +1,134 @@ +using System; +using Avalonia.Skia; +using Ryujinx.Ava.Ui.Vulkan; +using Ryujinx.Ava.Ui.Vulkan.Surfaces; +using SkiaSharp; + +namespace Ryujinx.Ava.Ui.Backend.Vulkan +{ + internal class VulkanRenderTarget : ISkiaGpuRenderTarget + { + public GRContext GrContext { get; set; } + + private readonly VulkanSurfaceRenderTarget _surface; + private readonly IVulkanPlatformSurface _vulkanPlatformSurface; + + public VulkanRenderTarget(VulkanPlatformInterface vulkanPlatformInterface, + IVulkanPlatformSurface vulkanPlatformSurface) + { + _surface = vulkanPlatformInterface.CreateRenderTarget(vulkanPlatformSurface); + _vulkanPlatformSurface = vulkanPlatformSurface; + } + + public void Dispose() + { + _surface.Dispose(); + } + + public ISkiaGpuRenderSession BeginRenderingSession() + { + var session = _surface.BeginDraw(_vulkanPlatformSurface.Scaling); + bool success = false; + try + { + var disp = session.Display; + var api = session.Api; + + var size = session.Size; + var scaling = session.Scaling; + if (size.Width <= 0 || size.Height <= 0 || scaling < 0) + { + size = new Avalonia.PixelSize(1, 1); + scaling = 1; + } + + lock (GrContext) + { + GrContext.ResetContext(); + + var imageInfo = new GRVkImageInfo() + { + CurrentQueueFamily = disp.QueueFamilyIndex, + Format = _surface.ImageFormat, + Image = _surface.Image.Handle, + ImageLayout = (uint)_surface.Image.CurrentLayout, + ImageTiling = (uint)_surface.Image.Tiling, + ImageUsageFlags = _surface.UsageFlags, + LevelCount = _surface.MipLevels, + SampleCount = 1, + Protected = false, + Alloc = new GRVkAlloc() + { + Memory = _surface.Image.MemoryHandle, + Flags = 0, + Offset = 0, + Size = _surface.MemorySize + } + }; + + var renderTarget = + new GRBackendRenderTarget((int)size.Width, (int)size.Height, 1, + imageInfo); + var surface = SKSurface.Create(GrContext, renderTarget, + session.IsYFlipped ? GRSurfaceOrigin.TopLeft : GRSurfaceOrigin.BottomLeft, + _surface.IsRgba ? SKColorType.Rgba8888 : SKColorType.Bgra8888, SKColorSpace.CreateSrgb()); + + if (surface == null) + { + throw new InvalidOperationException( + "Surface can't be created with the provided render target"); + } + + success = true; + + return new VulkanGpuSession(GrContext, renderTarget, surface, session); + } + } + finally + { + if (!success) + session.Dispose(); + } + } + + public bool IsCorrupted { get; } + + internal class VulkanGpuSession : ISkiaGpuRenderSession + { + private readonly GRBackendRenderTarget _backendRenderTarget; + private readonly VulkanSurfaceRenderingSession _vulkanSession; + + public VulkanGpuSession(GRContext grContext, + GRBackendRenderTarget backendRenderTarget, + SKSurface surface, + VulkanSurfaceRenderingSession vulkanSession) + { + GrContext = grContext; + _backendRenderTarget = backendRenderTarget; + SkSurface = surface; + _vulkanSession = vulkanSession; + + SurfaceOrigin = vulkanSession.IsYFlipped ? GRSurfaceOrigin.TopLeft : GRSurfaceOrigin.BottomLeft; + } + + public void Dispose() + { + lock (_vulkanSession.Display.Lock) + { + SkSurface.Canvas.Flush(); + + SkSurface.Dispose(); + _backendRenderTarget.Dispose(); + GrContext.Flush(); + + _vulkanSession.Dispose(); + } + } + + public GRContext GrContext { get; } + public SKSurface SkSurface { get; } + public double ScaleFactor => _vulkanSession.Scaling; + public GRSurfaceOrigin SurfaceOrigin { get; } + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/Skia/VulkanSkiaGpu.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/Skia/VulkanSkiaGpu.cs new file mode 100644 index 000000000..325ddd6cf --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/Skia/VulkanSkiaGpu.cs @@ -0,0 +1,118 @@ +using System; +using System.Collections.Generic; +using Avalonia; +using Avalonia.Platform; +using Avalonia.Skia; +using Avalonia.X11; +using Ryujinx.Ava.Ui.Vulkan; +using Silk.NET.Vulkan; +using SkiaSharp; + +namespace Ryujinx.Ava.Ui.Backend.Vulkan +{ + public class VulkanSkiaGpu : ISkiaGpu + { + private readonly VulkanPlatformInterface _vulkan; + private readonly long? _maxResourceBytes; + private GRContext _grContext; + private GRVkBackendContext _grVkBackend; + private bool _initialized; + + public GRContext GrContext { get => _grContext; set => _grContext = value; } + + public VulkanSkiaGpu(long? maxResourceBytes) + { + _vulkan = AvaloniaLocator.Current.GetService(); + _maxResourceBytes = maxResourceBytes; + } + + private void Initialize() + { + if (_initialized) + { + return; + } + + _initialized = true; + GRVkGetProcedureAddressDelegate getProc = (string name, IntPtr instanceHandle, IntPtr deviceHandle) => + { + IntPtr addr = IntPtr.Zero; + + if (deviceHandle != IntPtr.Zero) + { + addr = _vulkan.Device.Api.GetDeviceProcAddr(new Device(deviceHandle), name); + if (addr != IntPtr.Zero) + return addr; + + addr = _vulkan.Device.Api.GetDeviceProcAddr(new Device(_vulkan.Device.Handle), name); + + if (addr != IntPtr.Zero) + return addr; + } + + addr = _vulkan.Device.Api.GetInstanceProcAddr(new Instance(_vulkan.Instance.Handle), name); + + if (addr == IntPtr.Zero) + addr = _vulkan.Device.Api.GetInstanceProcAddr(new Instance(instanceHandle), name); + + return addr; + }; + + _grVkBackend = new GRVkBackendContext() + { + VkInstance = _vulkan.Device.Handle, + VkPhysicalDevice = _vulkan.PhysicalDevice.Handle, + VkDevice = _vulkan.Device.Handle, + VkQueue = _vulkan.Device.Queue.Handle, + GraphicsQueueIndex = _vulkan.PhysicalDevice.QueueFamilyIndex, + GetProcedureAddress = getProc + }; + _grContext = GRContext.CreateVulkan(_grVkBackend); + if (_maxResourceBytes.HasValue) + { + _grContext.SetResourceCacheLimit(_maxResourceBytes.Value); + } + } + + public ISkiaGpuRenderTarget TryCreateRenderTarget(IEnumerable surfaces) + { + foreach (var surface in surfaces) + { + VulkanWindowSurface window; + + if (surface is IPlatformHandle handle) + { + window = new VulkanWindowSurface(handle.Handle); + } + else if (surface is X11FramebufferSurface x11FramebufferSurface) + { + // As of Avalonia 0.10.13, an IPlatformHandle isn't passed for linux, so use reflection to otherwise get the window id + var xId = (IntPtr)x11FramebufferSurface.GetType().GetField( + "_xid", + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance).GetValue(x11FramebufferSurface); + + window = new VulkanWindowSurface(xId); + } + else + { + continue; + } + + var vulkanRenderTarget = new VulkanRenderTarget(_vulkan, window); + + Initialize(); + + vulkanRenderTarget.GrContext = _grContext; + + return vulkanRenderTarget; + } + + return null; + } + + public ISkiaSurface TryCreateSurface(PixelSize size, ISkiaGpuRenderSession session) + { + return null; + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/Skia/VulkanSurface.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/Skia/VulkanSurface.cs new file mode 100644 index 000000000..37a1577c0 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/Skia/VulkanSurface.cs @@ -0,0 +1,58 @@ +using Avalonia; +using Ryujinx.Ava.Ui.Vulkan; +using Ryujinx.Ava.Ui.Vulkan.Surfaces; +using Silk.NET.Vulkan; +using Silk.NET.Vulkan.Extensions.KHR; +using System; + +namespace Ryujinx.Ava.Ui.Backend.Vulkan +{ + internal class VulkanWindowSurface : BackendSurface, IVulkanPlatformSurface + { + public float Scaling => (float)Program.ActualScaleFactor; + + public PixelSize SurfaceSize => Size; + + public VulkanWindowSurface(IntPtr handle) : base(handle) + { + } + + public unsafe SurfaceKHR CreateSurface(VulkanInstance instance) + { + if (OperatingSystem.IsWindows()) + { + if (instance.Api.TryGetInstanceExtension(new Instance(instance.Handle), out KhrWin32Surface surfaceExtension)) + { + var createInfo = new Win32SurfaceCreateInfoKHR() { Hinstance = 0, Hwnd = Handle, SType = StructureType.Win32SurfaceCreateInfoKhr }; + + surfaceExtension.CreateWin32Surface(new Instance(instance.Handle), createInfo, null, out var surface).ThrowOnError(); + + return surface; + } + } + else if (OperatingSystem.IsLinux()) + { + if (instance.Api.TryGetInstanceExtension(new Instance(instance.Handle), out KhrXlibSurface surfaceExtension)) + { + var createInfo = new XlibSurfaceCreateInfoKHR() + { + SType = StructureType.XlibSurfaceCreateInfoKhr, + Dpy = (nint*)Display, + Window = Handle + }; + + surfaceExtension.CreateXlibSurface(new Instance(instance.Handle), createInfo, null, out var surface).ThrowOnError(); + + return surface; + } + } + + throw new PlatformNotSupportedException("The current platform does not support surface creation."); + } + + public override void Dispose() + { + base.Dispose(); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/Surfaces/IVulkanPlatformSurface.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/Surfaces/IVulkanPlatformSurface.cs new file mode 100644 index 000000000..642d8a6a3 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/Surfaces/IVulkanPlatformSurface.cs @@ -0,0 +1,13 @@ +using System; +using Avalonia; +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan.Surfaces +{ + public interface IVulkanPlatformSurface : IDisposable + { + float Scaling { get; } + PixelSize SurfaceSize { get; } + SurfaceKHR CreateSurface(VulkanInstance instance); + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/Surfaces/VulkanSurfaceRenderTarget.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/Surfaces/VulkanSurfaceRenderTarget.cs new file mode 100644 index 000000000..f7d9684ce --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/Surfaces/VulkanSurfaceRenderTarget.cs @@ -0,0 +1,92 @@ +using System; +using Avalonia; +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan.Surfaces +{ + internal class VulkanSurfaceRenderTarget : IDisposable + { + private readonly VulkanPlatformInterface _platformInterface; + + public bool IsCorrupted { get; set; } = true; + private readonly Format _format; + + public VulkanImage Image { get; private set; } + + public uint MipLevels => Image.MipLevels; + + public VulkanSurfaceRenderTarget(VulkanPlatformInterface platformInterface, VulkanSurface surface) + { + _platformInterface = platformInterface; + + Display = VulkanDisplay.CreateDisplay(platformInterface.Instance, platformInterface.Device, + platformInterface.PhysicalDevice, surface); + Surface = surface; + + // Skia seems to only create surfaces from images with unorm format + + IsRgba = Display.SurfaceFormat.Format >= Format.R8G8B8A8Unorm && + Display.SurfaceFormat.Format <= Format.R8G8B8A8Srgb; + + _format = IsRgba ? Format.R8G8B8A8Unorm : Format.B8G8R8A8Unorm; + } + + public bool IsRgba { get; } + + public uint ImageFormat => (uint) _format; + + public ulong MemorySize => Image.MemorySize; + + public VulkanDisplay Display { get; } + + public VulkanSurface Surface { get; } + + public uint UsageFlags => Image.UsageFlags; + + public PixelSize Size { get; private set; } + + public void Dispose() + { + _platformInterface.Device.WaitIdle(); + DestroyImage(); + Display?.Dispose(); + Surface?.Dispose(); + } + + public VulkanSurfaceRenderingSession BeginDraw(float scaling) + { + var session = new VulkanSurfaceRenderingSession(Display, _platformInterface.Device, this, scaling); + + if (IsCorrupted) + { + IsCorrupted = false; + DestroyImage(); + CreateImage(); + } + else + { + Image.TransitionLayout(ImageLayout.ColorAttachmentOptimal, AccessFlags.AccessNoneKhr); + } + + return session; + } + + public void Invalidate() + { + IsCorrupted = true; + } + + private void CreateImage() + { + Size = Display.Size; + + Image = new VulkanImage(_platformInterface.Device, _platformInterface.PhysicalDevice, _platformInterface.Device.CommandBufferPool, ImageFormat, Size); + } + + private void DestroyImage() + { + _platformInterface.Device.WaitIdle(); + Image?.Dispose(); + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanCommandBufferPool.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanCommandBufferPool.cs new file mode 100644 index 000000000..92c57905a --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanCommandBufferPool.cs @@ -0,0 +1,177 @@ +using System; +using System.Collections.Generic; +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + internal class VulkanCommandBufferPool : IDisposable + { + private readonly VulkanDevice _device; + private readonly CommandPool _commandPool; + + private readonly List _usedCommandBuffers = new(); + + public unsafe VulkanCommandBufferPool(VulkanDevice device, VulkanPhysicalDevice physicalDevice) + { + _device = device; + + var commandPoolCreateInfo = new CommandPoolCreateInfo + { + SType = StructureType.CommandPoolCreateInfo, + Flags = CommandPoolCreateFlags.CommandPoolCreateResetCommandBufferBit, + QueueFamilyIndex = physicalDevice.QueueFamilyIndex + }; + + device.Api.CreateCommandPool(_device.InternalHandle, commandPoolCreateInfo, null, out _commandPool) + .ThrowOnError(); + } + + public unsafe void Dispose() + { + FreeUsedCommandBuffers(); + _device.Api.DestroyCommandPool(_device.InternalHandle, _commandPool, null); + } + + private CommandBuffer AllocateCommandBuffer() + { + var commandBufferAllocateInfo = new CommandBufferAllocateInfo + { + SType = StructureType.CommandBufferAllocateInfo, + CommandPool = _commandPool, + CommandBufferCount = 1, + Level = CommandBufferLevel.Primary + }; + + _device.Api.AllocateCommandBuffers(_device.InternalHandle, commandBufferAllocateInfo, out var commandBuffer); + + return commandBuffer; + } + + public VulkanCommandBuffer CreateCommandBuffer() + { + return new(_device, this); + } + + public void FreeUsedCommandBuffers() + { + lock (_usedCommandBuffers) + { + foreach (var usedCommandBuffer in _usedCommandBuffers) usedCommandBuffer.Dispose(); + + _usedCommandBuffers.Clear(); + } + } + + private void DisposeCommandBuffer(VulkanCommandBuffer commandBuffer) + { + lock (_usedCommandBuffers) + { + _usedCommandBuffers.Add(commandBuffer); + } + } + + public class VulkanCommandBuffer : IDisposable + { + private readonly VulkanCommandBufferPool _commandBufferPool; + private readonly VulkanDevice _device; + private readonly Fence _fence; + private bool _hasEnded; + private bool _hasStarted; + + public IntPtr Handle => InternalHandle.Handle; + + internal CommandBuffer InternalHandle { get; } + + internal unsafe VulkanCommandBuffer(VulkanDevice device, VulkanCommandBufferPool commandBufferPool) + { + _device = device; + _commandBufferPool = commandBufferPool; + + InternalHandle = _commandBufferPool.AllocateCommandBuffer(); + + var fenceCreateInfo = new FenceCreateInfo() + { + SType = StructureType.FenceCreateInfo, + Flags = FenceCreateFlags.FenceCreateSignaledBit + }; + + device.Api.CreateFence(device.InternalHandle, fenceCreateInfo, null, out _fence); + } + + public unsafe void Dispose() + { + _device.Api.WaitForFences(_device.InternalHandle, 1, _fence, true, ulong.MaxValue); + _device.Api.FreeCommandBuffers(_device.InternalHandle, _commandBufferPool._commandPool, 1, InternalHandle); + _device.Api.DestroyFence(_device.InternalHandle, _fence, null); + } + + public void BeginRecording() + { + if (!_hasStarted) + { + _hasStarted = true; + + var beginInfo = new CommandBufferBeginInfo + { + SType = StructureType.CommandBufferBeginInfo, + Flags = CommandBufferUsageFlags.CommandBufferUsageOneTimeSubmitBit + }; + + _device.Api.BeginCommandBuffer(InternalHandle, beginInfo); + } + } + + public void EndRecording() + { + if (_hasStarted && !_hasEnded) + { + _hasEnded = true; + + _device.Api.EndCommandBuffer(InternalHandle); + } + } + + public void Submit() + { + Submit(null, null, null, _fence); + } + + public unsafe void Submit( + ReadOnlySpan waitSemaphores, + ReadOnlySpan waitDstStageMask, + ReadOnlySpan signalSemaphores, + Fence? fence = null) + { + EndRecording(); + + if (!fence.HasValue) + fence = _fence; + + fixed (Semaphore* pWaitSemaphores = waitSemaphores, pSignalSemaphores = signalSemaphores) + { + fixed (PipelineStageFlags* pWaitDstStageMask = waitDstStageMask) + { + var commandBuffer = InternalHandle; + var submitInfo = new SubmitInfo + { + SType = StructureType.SubmitInfo, + WaitSemaphoreCount = waitSemaphores != null ? (uint)waitSemaphores.Length : 0, + PWaitSemaphores = pWaitSemaphores, + PWaitDstStageMask = pWaitDstStageMask, + CommandBufferCount = 1, + PCommandBuffers = &commandBuffer, + SignalSemaphoreCount = signalSemaphores != null ? (uint)signalSemaphores.Length : 0, + PSignalSemaphores = pSignalSemaphores, + }; + + _device.Api.ResetFences(_device.InternalHandle, 1, fence.Value); + + _device.Submit(submitInfo, fence.Value); + } + } + + _commandBufferPool.DisposeCommandBuffer(this); + } + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanDevice.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanDevice.cs new file mode 100644 index 000000000..7eb42cdf0 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanDevice.cs @@ -0,0 +1,67 @@ +using System; +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + internal class VulkanDevice : IDisposable + { + private static object _lock = new object(); + + public VulkanDevice(Device apiHandle, VulkanPhysicalDevice physicalDevice, Vk api) + { + InternalHandle = apiHandle; + Api = api; + + api.GetDeviceQueue(apiHandle, physicalDevice.QueueFamilyIndex, 0, out var queue); + + var vulkanQueue = new VulkanQueue(this, queue); + Queue = vulkanQueue; + + PresentQueue = vulkanQueue; + + CommandBufferPool = new VulkanCommandBufferPool(this, physicalDevice); + } + + public IntPtr Handle => InternalHandle.Handle; + + internal Device InternalHandle { get; } + public Vk Api { get; } + + public VulkanQueue Queue { get; private set; } + public VulkanQueue PresentQueue { get; } + public VulkanCommandBufferPool CommandBufferPool { get; } + + public void Dispose() + { + WaitIdle(); + CommandBufferPool?.Dispose(); + Queue = null; + } + + internal void Submit(SubmitInfo submitInfo, Fence fence = new()) + { + lock (_lock) + { + Api.QueueSubmit(Queue.InternalHandle, 1, submitInfo, fence); + } + } + + public void WaitIdle() + { + lock (_lock) + { + Api.DeviceWaitIdle(InternalHandle); + } + } + + public void QueueWaitIdle() + { + lock (_lock) + { + Api.QueueWaitIdle(Queue.InternalHandle); + } + } + + public object Lock => _lock; + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanDisplay.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanDisplay.cs new file mode 100644 index 000000000..2fbe7da8d --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanDisplay.cs @@ -0,0 +1,406 @@ +using System; +using System.Linq; +using System.Threading; +using Avalonia; +using Ryujinx.Ava.Ui.Vulkan.Surfaces; +using Silk.NET.Vulkan; +using Silk.NET.Vulkan.Extensions.KHR; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + internal class VulkanDisplay : IDisposable + { + private static KhrSwapchain _swapchainExtension; + private readonly VulkanInstance _instance; + private readonly VulkanPhysicalDevice _physicalDevice; + private readonly VulkanSemaphorePair _semaphorePair; + private uint _nextImage; + private readonly VulkanSurface _surface; + private SurfaceFormatKHR _surfaceFormat; + private SwapchainKHR _swapchain; + private Extent2D _swapchainExtent; + private Image[] _swapchainImages; + private VulkanDevice _device { get; } + private ImageView[] _swapchainImageViews = new ImageView[0]; + + public VulkanCommandBufferPool CommandBufferPool { get; set; } + + public object Lock => _device.Lock; + + private VulkanDisplay(VulkanInstance instance, VulkanDevice device, + VulkanPhysicalDevice physicalDevice, VulkanSurface surface, SwapchainKHR swapchain, + Extent2D swapchainExtent) + { + _instance = instance; + _device = device; + _physicalDevice = physicalDevice; + _swapchain = swapchain; + _swapchainExtent = swapchainExtent; + _surface = surface; + + CreateSwapchainImages(); + + _semaphorePair = new VulkanSemaphorePair(_device); + + CommandBufferPool = new VulkanCommandBufferPool(device, physicalDevice); + } + + public PixelSize Size { get; private set; } + public uint QueueFamilyIndex => _physicalDevice.QueueFamilyIndex; + + internal SurfaceFormatKHR SurfaceFormat + { + get + { + if (_surfaceFormat.Format == Format.Undefined) + _surfaceFormat = _surface.GetSurfaceFormat(_physicalDevice); + + return _surfaceFormat; + } + } + + public unsafe void Dispose() + { + _device.WaitIdle(); + _semaphorePair?.Dispose(); + DestroyCurrentImageViews(); + _swapchainExtension.DestroySwapchain(_device.InternalHandle, _swapchain, null); + CommandBufferPool.Dispose(); + } + + private static unsafe SwapchainKHR CreateSwapchain(VulkanInstance instance, VulkanDevice device, + VulkanPhysicalDevice physicalDevice, VulkanSurface surface, out Extent2D swapchainExtent, + VulkanDisplay oldDisplay = null) + { + if (_swapchainExtension == null) + { + instance.Api.TryGetDeviceExtension(instance.InternalHandle, device.InternalHandle, out KhrSwapchain extension); + + _swapchainExtension = extension; + } + + while (!surface.CanSurfacePresent(physicalDevice)) + { + Thread.Sleep(16); + } + + VulkanSurface.SurfaceExtension.GetPhysicalDeviceSurfaceCapabilities(physicalDevice.InternalHandle, + surface.ApiHandle, out var capabilities); + + uint presentModesCount; + + VulkanSurface.SurfaceExtension.GetPhysicalDeviceSurfacePresentModes(physicalDevice.InternalHandle, + surface.ApiHandle, + &presentModesCount, null); + + var presentModes = new PresentModeKHR[presentModesCount]; + + fixed (PresentModeKHR* pPresentModes = presentModes) + { + VulkanSurface.SurfaceExtension.GetPhysicalDeviceSurfacePresentModes(physicalDevice.InternalHandle, + surface.ApiHandle, &presentModesCount, pPresentModes); + } + + var imageCount = capabilities.MinImageCount + 1; + if (capabilities.MaxImageCount > 0 && imageCount > capabilities.MaxImageCount) + imageCount = capabilities.MaxImageCount; + + var surfaceFormat = surface.GetSurfaceFormat(physicalDevice); + + bool supportsIdentityTransform = capabilities.SupportedTransforms.HasFlag(SurfaceTransformFlagsKHR.SurfaceTransformIdentityBitKhr); + bool isRotated = capabilities.CurrentTransform.HasFlag(SurfaceTransformFlagsKHR.SurfaceTransformRotate90BitKhr) || + capabilities.CurrentTransform.HasFlag(SurfaceTransformFlagsKHR.SurfaceTransformRotate270BitKhr); + + if (capabilities.CurrentExtent.Width != uint.MaxValue) + { + swapchainExtent = capabilities.CurrentExtent; + } + else + { + var surfaceSize = surface.SurfaceSize; + + var width = Math.Max(capabilities.MinImageExtent.Width, + Math.Min(capabilities.MaxImageExtent.Width, (uint)surfaceSize.Width)); + var height = Math.Max(capabilities.MinImageExtent.Height, + Math.Min(capabilities.MaxImageExtent.Height, (uint)surfaceSize.Height)); + + swapchainExtent = new Extent2D(width, height); + } + + PresentModeKHR presentMode; + var modes = presentModes.ToList(); + + if (modes.Contains(PresentModeKHR.PresentModeImmediateKhr)) + presentMode = PresentModeKHR.PresentModeImmediateKhr; + else if (modes.Contains(PresentModeKHR.PresentModeMailboxKhr)) + presentMode = PresentModeKHR.PresentModeMailboxKhr; + else + presentMode = PresentModeKHR.PresentModeFifoKhr; + + var compositeAlphaFlags = CompositeAlphaFlagsKHR.CompositeAlphaOpaqueBitKhr; + + if (capabilities.SupportedCompositeAlpha.HasFlag(CompositeAlphaFlagsKHR.CompositeAlphaPostMultipliedBitKhr)) + { + compositeAlphaFlags = CompositeAlphaFlagsKHR.CompositeAlphaPostMultipliedBitKhr; + } + else if (capabilities.SupportedCompositeAlpha.HasFlag(CompositeAlphaFlagsKHR.CompositeAlphaPreMultipliedBitKhr)) + { + compositeAlphaFlags = CompositeAlphaFlagsKHR.CompositeAlphaPreMultipliedBitKhr; + } + + var swapchainCreateInfo = new SwapchainCreateInfoKHR + { + SType = StructureType.SwapchainCreateInfoKhr, + Surface = surface.ApiHandle, + MinImageCount = imageCount, + ImageFormat = surfaceFormat.Format, + ImageColorSpace = surfaceFormat.ColorSpace, + ImageExtent = swapchainExtent, + ImageUsage = + ImageUsageFlags.ImageUsageColorAttachmentBit | ImageUsageFlags.ImageUsageTransferDstBit, + ImageSharingMode = SharingMode.Exclusive, + ImageArrayLayers = 1, + PreTransform = supportsIdentityTransform && isRotated ? SurfaceTransformFlagsKHR.SurfaceTransformIdentityBitKhr : capabilities.CurrentTransform, + CompositeAlpha = compositeAlphaFlags, + PresentMode = presentMode, + Clipped = true, + OldSwapchain = oldDisplay?._swapchain ?? new SwapchainKHR() + }; + + _swapchainExtension.CreateSwapchain(device.InternalHandle, swapchainCreateInfo, null, out var swapchain) + .ThrowOnError(); + + if (oldDisplay != null) + { + _swapchainExtension.DestroySwapchain(device.InternalHandle, oldDisplay._swapchain, null); + } + + return swapchain; + } + + + internal static VulkanDisplay CreateDisplay(VulkanInstance instance, VulkanDevice device, + VulkanPhysicalDevice physicalDevice, VulkanSurface surface) + { + var swapchain = CreateSwapchain(instance, device, physicalDevice, surface, out var extent); + + return new VulkanDisplay(instance, device, physicalDevice, surface, swapchain, extent); + } + + private unsafe void CreateSwapchainImages() + { + DestroyCurrentImageViews(); + + Size = new PixelSize((int)_swapchainExtent.Width, (int)_swapchainExtent.Height); + + uint imageCount = 0; + + _swapchainExtension.GetSwapchainImages(_device.InternalHandle, _swapchain, &imageCount, null); + + _swapchainImages = new Image[imageCount]; + + fixed (Image* pSwapchainImages = _swapchainImages) + { + _swapchainExtension.GetSwapchainImages(_device.InternalHandle, _swapchain, &imageCount, pSwapchainImages); + } + + _swapchainImageViews = new ImageView[imageCount]; + + var surfaceFormat = SurfaceFormat; + + for (var i = 0; i < imageCount; i++) + { + _swapchainImageViews[i] = CreateSwapchainImageView(_swapchainImages[i], surfaceFormat.Format); + } + } + + private unsafe void DestroyCurrentImageViews() + { + if (_swapchainImageViews.Length > 0) + { + for (var i = 0; i < _swapchainImageViews.Length; i++) + { + _instance.Api.DestroyImageView(_device.InternalHandle, _swapchainImageViews[i], null); + } + } + } + + private void Recreate() + { + _device.WaitIdle(); + _swapchain = CreateSwapchain(_instance, _device, _physicalDevice, _surface, out var extent, this); + + _swapchainExtent = extent; + + CreateSwapchainImages(); + } + + private unsafe ImageView CreateSwapchainImageView(Image swapchainImage, Format format) + { + var componentMapping = new ComponentMapping( + ComponentSwizzle.Identity, + ComponentSwizzle.Identity, + ComponentSwizzle.Identity, + ComponentSwizzle.Identity); + + var aspectFlags = ImageAspectFlags.ImageAspectColorBit; + + var subresourceRange = new ImageSubresourceRange(aspectFlags, 0, 1, 0, 1); + + var imageCreateInfo = new ImageViewCreateInfo + { + SType = StructureType.ImageViewCreateInfo, + Image = swapchainImage, + ViewType = ImageViewType.ImageViewType2D, + Format = format, + Components = componentMapping, + SubresourceRange = subresourceRange + }; + + _instance.Api.CreateImageView(_device.InternalHandle, imageCreateInfo, null, out var imageView).ThrowOnError(); + return imageView; + } + + public bool EnsureSwapchainAvailable() + { + if (Size != _surface.SurfaceSize) + { + Recreate(); + + return false; + } + + return true; + } + + internal VulkanCommandBufferPool.VulkanCommandBuffer StartPresentation(VulkanSurfaceRenderTarget renderTarget) + { + _nextImage = 0; + while (true) + { + var acquireResult = _swapchainExtension.AcquireNextImage( + _device.InternalHandle, + _swapchain, + ulong.MaxValue, + _semaphorePair.ImageAvailableSemaphore, + new Fence(), + ref _nextImage); + + if (acquireResult == Result.ErrorOutOfDateKhr || + acquireResult == Result.SuboptimalKhr) + { + Recreate(); + } + else + { + acquireResult.ThrowOnError(); + break; + } + } + + var commandBuffer = CommandBufferPool.CreateCommandBuffer(); + commandBuffer.BeginRecording(); + + VulkanMemoryHelper.TransitionLayout(_device, commandBuffer.InternalHandle, + _swapchainImages[_nextImage], ImageLayout.Undefined, + AccessFlags.AccessNoneKhr, + ImageLayout.TransferDstOptimal, + AccessFlags.AccessTransferWriteBit, + 1); + + return commandBuffer; + } + + internal void BlitImageToCurrentImage(VulkanSurfaceRenderTarget renderTarget, CommandBuffer commandBuffer) + { + VulkanMemoryHelper.TransitionLayout(_device, commandBuffer, + renderTarget.Image.InternalHandle.Value, (ImageLayout)renderTarget.Image.CurrentLayout, + AccessFlags.AccessNoneKhr, + ImageLayout.TransferSrcOptimal, + AccessFlags.AccessTransferReadBit, + renderTarget.MipLevels); + + var srcBlitRegion = new ImageBlit + { + SrcOffsets = new ImageBlit.SrcOffsetsBuffer + { + Element0 = new Offset3D(0, 0, 0), + Element1 = new Offset3D(renderTarget.Size.Width, renderTarget.Size.Height, 1), + }, + DstOffsets = new ImageBlit.DstOffsetsBuffer + { + Element0 = new Offset3D(0, 0, 0), + Element1 = new Offset3D(Size.Width, Size.Height, 1), + }, + SrcSubresource = new ImageSubresourceLayers + { + AspectMask = ImageAspectFlags.ImageAspectColorBit, + BaseArrayLayer = 0, + LayerCount = 1, + MipLevel = 0 + }, + DstSubresource = new ImageSubresourceLayers + { + AspectMask = ImageAspectFlags.ImageAspectColorBit, + BaseArrayLayer = 0, + LayerCount = 1, + MipLevel = 0 + } + }; + + _device.Api.CmdBlitImage(commandBuffer, renderTarget.Image.InternalHandle.Value, + ImageLayout.TransferSrcOptimal, + _swapchainImages[_nextImage], + ImageLayout.TransferDstOptimal, + 1, + srcBlitRegion, + Filter.Linear); + + VulkanMemoryHelper.TransitionLayout(_device, commandBuffer, + renderTarget.Image.InternalHandle.Value, ImageLayout.TransferSrcOptimal, + AccessFlags.AccessTransferReadBit, + (ImageLayout)renderTarget.Image.CurrentLayout, + AccessFlags.AccessNoneKhr, + renderTarget.MipLevels); + } + + internal unsafe void EndPresentation(VulkanCommandBufferPool.VulkanCommandBuffer commandBuffer) + { + VulkanMemoryHelper.TransitionLayout(_device, commandBuffer.InternalHandle, + _swapchainImages[_nextImage], ImageLayout.TransferDstOptimal, + AccessFlags.AccessNoneKhr, + ImageLayout.PresentSrcKhr, + AccessFlags.AccessNoneKhr, + 1); + + commandBuffer.Submit( + new[] { _semaphorePair.ImageAvailableSemaphore }, + new[] { PipelineStageFlags.PipelineStageColorAttachmentOutputBit }, + new[] { _semaphorePair.RenderFinishedSemaphore }); + + var semaphore = _semaphorePair.RenderFinishedSemaphore; + var swapchain = _swapchain; + var nextImage = _nextImage; + + Result result; + + var presentInfo = new PresentInfoKHR + { + SType = StructureType.PresentInfoKhr, + WaitSemaphoreCount = 1, + PWaitSemaphores = &semaphore, + SwapchainCount = 1, + PSwapchains = &swapchain, + PImageIndices = &nextImage, + PResults = &result + }; + + lock (_device.Lock) + { + _swapchainExtension.QueuePresent(_device.PresentQueue.InternalHandle, presentInfo); + } + + CommandBufferPool.FreeUsedCommandBuffers(); + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanImage.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanImage.cs new file mode 100644 index 000000000..343ba7605 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanImage.cs @@ -0,0 +1,167 @@ +using System; +using Avalonia; +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + internal class VulkanImage : IDisposable + { + private readonly VulkanDevice _device; + private readonly VulkanPhysicalDevice _physicalDevice; + private readonly VulkanCommandBufferPool _commandBufferPool; + private ImageLayout _currentLayout; + private AccessFlags _currentAccessFlags; + private ImageUsageFlags _imageUsageFlags { get; } + private ImageView? _imageView { get; set; } + private DeviceMemory _imageMemory { get; set; } + + internal Image? InternalHandle { get; private set; } + internal Format Format { get; } + internal ImageAspectFlags AspectFlags { get; private set; } + + public ulong Handle => InternalHandle?.Handle ?? 0; + public ulong ViewHandle => _imageView?.Handle ?? 0; + public uint UsageFlags => (uint)_imageUsageFlags; + public ulong MemoryHandle => _imageMemory.Handle; + public uint MipLevels { get; private set; } + public PixelSize Size { get; } + public ulong MemorySize { get; private set; } + public uint CurrentLayout => (uint)_currentLayout; + + public VulkanImage( + VulkanDevice device, + VulkanPhysicalDevice physicalDevice, + VulkanCommandBufferPool commandBufferPool, + uint format, + PixelSize size, + uint mipLevels = 0) + { + _device = device; + _physicalDevice = physicalDevice; + _commandBufferPool = commandBufferPool; + Format = (Format)format; + Size = size; + MipLevels = mipLevels; + _imageUsageFlags = + ImageUsageFlags.ImageUsageColorAttachmentBit | ImageUsageFlags.ImageUsageTransferDstBit | + ImageUsageFlags.ImageUsageTransferSrcBit | ImageUsageFlags.ImageUsageSampledBit; + + Initialize(); + } + + public unsafe void Initialize() + { + if (!InternalHandle.HasValue) + { + MipLevels = MipLevels != 0 ? MipLevels : (uint)Math.Floor(Math.Log(Math.Max(Size.Width, Size.Height), 2)); + + var imageCreateInfo = new ImageCreateInfo + { + SType = StructureType.ImageCreateInfo, + ImageType = ImageType.ImageType2D, + Format = Format, + Extent = new Extent3D((uint?)Size.Width, (uint?)Size.Height, 1), + MipLevels = MipLevels, + ArrayLayers = 1, + Samples = SampleCountFlags.SampleCount1Bit, + Tiling = Tiling, + Usage = _imageUsageFlags, + SharingMode = SharingMode.Exclusive, + InitialLayout = ImageLayout.Undefined, + Flags = ImageCreateFlags.ImageCreateMutableFormatBit + }; + + _device.Api.CreateImage(_device.InternalHandle, imageCreateInfo, null, out var image).ThrowOnError(); + InternalHandle = image; + + _device.Api.GetImageMemoryRequirements(_device.InternalHandle, InternalHandle.Value, + out var memoryRequirements); + + var memoryAllocateInfo = new MemoryAllocateInfo + { + SType = StructureType.MemoryAllocateInfo, + AllocationSize = memoryRequirements.Size, + MemoryTypeIndex = (uint)VulkanMemoryHelper.FindSuitableMemoryTypeIndex( + _physicalDevice, + memoryRequirements.MemoryTypeBits, MemoryPropertyFlags.MemoryPropertyDeviceLocalBit) + }; + + _device.Api.AllocateMemory(_device.InternalHandle, memoryAllocateInfo, null, + out var imageMemory); + + _imageMemory = imageMemory; + + _device.Api.BindImageMemory(_device.InternalHandle, InternalHandle.Value, _imageMemory, 0); + + MemorySize = memoryRequirements.Size; + + var componentMapping = new ComponentMapping( + ComponentSwizzle.Identity, + ComponentSwizzle.Identity, + ComponentSwizzle.Identity, + ComponentSwizzle.Identity); + + AspectFlags = ImageAspectFlags.ImageAspectColorBit; + + var subresourceRange = new ImageSubresourceRange(AspectFlags, 0, MipLevels, 0, 1); + + var imageViewCreateInfo = new ImageViewCreateInfo + { + SType = StructureType.ImageViewCreateInfo, + Image = InternalHandle.Value, + ViewType = ImageViewType.ImageViewType2D, + Format = Format, + Components = componentMapping, + SubresourceRange = subresourceRange + }; + + _device.Api + .CreateImageView(_device.InternalHandle, imageViewCreateInfo, null, out var imageView) + .ThrowOnError(); + + _imageView = imageView; + + _currentLayout = ImageLayout.Undefined; + + TransitionLayout(ImageLayout.ColorAttachmentOptimal, AccessFlags.AccessNoneKhr); + } + } + + public ImageTiling Tiling => ImageTiling.Optimal; + + internal void TransitionLayout(ImageLayout destinationLayout, AccessFlags destinationAccessFlags) + { + var commandBuffer = _commandBufferPool.CreateCommandBuffer(); + commandBuffer.BeginRecording(); + + VulkanMemoryHelper.TransitionLayout(_device, commandBuffer.InternalHandle, InternalHandle.Value, + _currentLayout, + _currentAccessFlags, + destinationLayout, destinationAccessFlags, + MipLevels); + + commandBuffer.EndRecording(); + + commandBuffer.Submit(); + + _currentLayout = destinationLayout; + _currentAccessFlags = destinationAccessFlags; + } + + public void TransitionLayout(uint destinationLayout, uint destinationAccessFlags) + { + TransitionLayout((ImageLayout)destinationLayout, (AccessFlags)destinationAccessFlags); + } + + public unsafe void Dispose() + { + _device.Api.DestroyImageView(_device.InternalHandle, _imageView.Value, null); + _device.Api.DestroyImage(_device.InternalHandle, InternalHandle.Value, null); + _device.Api.FreeMemory(_device.InternalHandle, _imageMemory, null); + + _imageView = default; + InternalHandle = default; + _imageMemory = default; + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanInstance.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanInstance.cs new file mode 100644 index 000000000..910b0dd06 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanInstance.cs @@ -0,0 +1,138 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using Silk.NET.Core; +using Silk.NET.Vulkan; +using Silk.NET.Vulkan.Extensions.EXT; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + public class VulkanInstance : IDisposable + { + private const string EngineName = "Avalonia Vulkan"; + + private VulkanInstance(Instance apiHandle, Vk api) + { + InternalHandle = apiHandle; + Api = api; + } + + public IntPtr Handle => InternalHandle.Handle; + + internal Instance InternalHandle { get; } + public Vk Api { get; } + + internal static IList RequiredInstanceExtensions + { + get + { + var extensions = new List { "VK_KHR_surface" }; + + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + extensions.Add("VK_KHR_xlib_surface"); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + extensions.Add("VK_KHR_win32_surface"); + } + + return extensions; + } + } + + public unsafe void Dispose() + { + Api?.DestroyInstance(InternalHandle, null); + Api?.Dispose(); + } + + internal static unsafe VulkanInstance Create(VulkanOptions options) + { + var api = Vk.GetApi(); + var applicationName = Marshal.StringToHGlobalAnsi(options.ApplicationName); + var engineName = Marshal.StringToHGlobalAnsi(EngineName); + var enabledExtensions = new List(options.InstanceExtensions); + + enabledExtensions.AddRange(RequiredInstanceExtensions); + + var applicationInfo = new ApplicationInfo + { + PApplicationName = (byte*)applicationName, + ApiVersion = new Version32((uint)options.VulkanVersion.Major, (uint)options.VulkanVersion.Minor, + (uint)options.VulkanVersion.Build), + PEngineName = (byte*)engineName, + EngineVersion = new Version32(1, 0, 0), + ApplicationVersion = new Version32(1, 0, 0) + }; + + var enabledLayers = new HashSet(); + + if (options.UseDebug) + { + enabledExtensions.Add(ExtDebugUtils.ExtensionName); + if (IsLayerAvailable(api, "VK_LAYER_KHRONOS_validation")) + enabledLayers.Add("VK_LAYER_KHRONOS_validation"); + } + + foreach (var layer in options.EnabledLayers) + enabledLayers.Add(layer); + + var ppEnabledExtensions = stackalloc IntPtr[enabledExtensions.Count]; + var ppEnabledLayers = stackalloc IntPtr[enabledLayers.Count]; + + for (var i = 0; i < enabledExtensions.Count; i++) + ppEnabledExtensions[i] = Marshal.StringToHGlobalAnsi(enabledExtensions[i]); + + var layers = enabledLayers.ToList(); + + for (var i = 0; i < enabledLayers.Count; i++) + ppEnabledLayers[i] = Marshal.StringToHGlobalAnsi(layers[i]); + + var instanceCreateInfo = new InstanceCreateInfo + { + SType = StructureType.InstanceCreateInfo, + PApplicationInfo = &applicationInfo, + PpEnabledExtensionNames = (byte**)ppEnabledExtensions, + PpEnabledLayerNames = (byte**)ppEnabledLayers, + EnabledExtensionCount = (uint)enabledExtensions.Count, + EnabledLayerCount = (uint)enabledLayers.Count + }; + + api.CreateInstance(in instanceCreateInfo, null, out var instance).ThrowOnError(); + + Marshal.FreeHGlobal(applicationName); + Marshal.FreeHGlobal(engineName); + + for (var i = 0; i < enabledExtensions.Count; i++) Marshal.FreeHGlobal(ppEnabledExtensions[i]); + + for (var i = 0; i < enabledLayers.Count; i++) Marshal.FreeHGlobal(ppEnabledLayers[i]); + + return new VulkanInstance(instance, api); + } + + private static unsafe bool IsLayerAvailable(Vk api, string layerName) + { + uint layerPropertiesCount; + + api.EnumerateInstanceLayerProperties(&layerPropertiesCount, null).ThrowOnError(); + + var layerProperties = new LayerProperties[layerPropertiesCount]; + + fixed (LayerProperties* pLayerProperties = layerProperties) + { + api.EnumerateInstanceLayerProperties(&layerPropertiesCount, layerProperties).ThrowOnError(); + + for (var i = 0; i < layerPropertiesCount; i++) + { + var currentLayerName = Marshal.PtrToStringAnsi((IntPtr)pLayerProperties[i].LayerName); + + if (currentLayerName == layerName) return true; + } + } + + return false; + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanMemoryHelper.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanMemoryHelper.cs new file mode 100644 index 000000000..a70525920 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanMemoryHelper.cs @@ -0,0 +1,59 @@ +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + internal static class VulkanMemoryHelper + { + internal static int FindSuitableMemoryTypeIndex(VulkanPhysicalDevice physicalDevice, uint memoryTypeBits, + MemoryPropertyFlags flags) + { + physicalDevice.Api.GetPhysicalDeviceMemoryProperties(physicalDevice.InternalHandle, out var properties); + + for (var i = 0; i < properties.MemoryTypeCount; i++) + { + var type = properties.MemoryTypes[i]; + + if ((memoryTypeBits & (1 << i)) != 0 && type.PropertyFlags.HasFlag(flags)) return i; + } + + return -1; + } + + internal static unsafe void TransitionLayout(VulkanDevice device, + CommandBuffer commandBuffer, + Image image, + ImageLayout sourceLayout, + AccessFlags sourceAccessMask, + ImageLayout destinationLayout, + AccessFlags destinationAccessMask, + uint mipLevels) + { + var subresourceRange = new ImageSubresourceRange(ImageAspectFlags.ImageAspectColorBit, 0, mipLevels, 0, 1); + + var barrier = new ImageMemoryBarrier + { + SType = StructureType.ImageMemoryBarrier, + SrcAccessMask = sourceAccessMask, + DstAccessMask = destinationAccessMask, + OldLayout = sourceLayout, + NewLayout = destinationLayout, + SrcQueueFamilyIndex = Vk.QueueFamilyIgnored, + DstQueueFamilyIndex = Vk.QueueFamilyIgnored, + Image = image, + SubresourceRange = subresourceRange + }; + + device.Api.CmdPipelineBarrier( + commandBuffer, + PipelineStageFlags.PipelineStageAllCommandsBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + 0, + 0, + null, + 0, + null, + 1, + barrier); + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanOptions.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanOptions.cs new file mode 100644 index 000000000..b4a060c99 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanOptions.cs @@ -0,0 +1,48 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + public class VulkanOptions + { + /// + /// Sets the application name of the Vulkan instance + /// + public string ApplicationName { get; set; } + + /// + /// Specifies the Vulkan API version to use + /// + public Version VulkanVersion{ get; set; } = new Version(1, 1, 0); + + /// + /// Specifies additional extensions to enable if available on the instance + /// + public IList InstanceExtensions { get; set; } = new List(); + + /// + /// Specifies layers to enable if available on the instance + /// + public IList EnabledLayers { get; set; } = new List(); + + /// + /// Enables the debug layer + /// + public bool UseDebug { get; set; } + + /// + /// Selects the first suitable discrete GPU available + /// + public bool PreferDiscreteGpu { get; set; } + + /// + /// Sets the device to use if available and suitable. + /// + public string PreferredDevice { get; set; } + + /// + /// Max number of device queues to request + /// + public uint MaxQueueCount { get; set; } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanPhysicalDevice.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanPhysicalDevice.cs new file mode 100644 index 000000000..df677e718 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanPhysicalDevice.cs @@ -0,0 +1,203 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using Ryujinx.Graphics.Vulkan; +using Silk.NET.Core; +using Silk.NET.Vulkan; +using Silk.NET.Vulkan.Extensions.KHR; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + public unsafe class VulkanPhysicalDevice + { + private VulkanPhysicalDevice(PhysicalDevice apiHandle, Vk api, uint queueCount, uint queueFamilyIndex) + { + InternalHandle = apiHandle; + Api = api; + QueueCount = queueCount; + QueueFamilyIndex = queueFamilyIndex; + + api.GetPhysicalDeviceProperties(apiHandle, out var properties); + + DeviceName = Marshal.PtrToStringAnsi((IntPtr)properties.DeviceName); + + var version = (Version32)properties.ApiVersion; + ApiVersion = new Version((int)version.Major, (int)version.Minor, 0, (int)version.Patch); + } + + internal PhysicalDevice InternalHandle { get; } + internal Vk Api { get; } + public uint QueueCount { get; } + public uint QueueFamilyIndex { get; } + public IntPtr Handle => InternalHandle.Handle; + + public string DeviceName { get; } + public Version ApiVersion { get; } + + internal static unsafe VulkanPhysicalDevice FindSuitablePhysicalDevice(VulkanInstance instance, + VulkanSurface surface, bool preferDiscreteGpu, string preferredDevice) + { + uint physicalDeviceCount; + + instance.Api.EnumeratePhysicalDevices(instance.InternalHandle, &physicalDeviceCount, null).ThrowOnError(); + + var physicalDevices = new PhysicalDevice[physicalDeviceCount]; + + fixed (PhysicalDevice* pPhysicalDevices = physicalDevices) + { + instance.Api.EnumeratePhysicalDevices(instance.InternalHandle, &physicalDeviceCount, pPhysicalDevices) + .ThrowOnError(); + } + + var physicalDeviceProperties = new Dictionary(); + + foreach (var physicalDevice in physicalDevices) + { + instance.Api.GetPhysicalDeviceProperties(physicalDevice, out var properties); + physicalDeviceProperties.Add(physicalDevice, properties); + } + + if (!string.IsNullOrWhiteSpace(preferredDevice)) + { + var physicalDevice = physicalDeviceProperties.FirstOrDefault(x => VulkanInitialization.StringFromIdPair(x.Value.VendorID, x.Value.DeviceID) == preferredDevice); + if (physicalDevice.Key.Handle != 0 && IsSuitableDevice(instance.Api, physicalDevice.Key, + physicalDevice.Value, surface.ApiHandle, out var queueCount, + out var queueFamilyIndex)) + return new VulkanPhysicalDevice(physicalDevice.Key, instance.Api, queueCount, queueFamilyIndex); + } + + if (preferDiscreteGpu) + { + var discreteGpus = physicalDeviceProperties.Where(p => p.Value.DeviceType == PhysicalDeviceType.DiscreteGpu); + + foreach (var gpu in discreteGpus) + { + if (IsSuitableDevice( + instance.Api, + gpu.Key, + gpu.Value, + surface.ApiHandle, + out var queueCount, + out var queueFamilyIndex)) + { + return new VulkanPhysicalDevice(gpu.Key, instance.Api, queueCount, queueFamilyIndex); + } + + physicalDeviceProperties.Remove(gpu.Key); + } + } + + foreach (var physicalDevice in physicalDeviceProperties) + if (IsSuitableDevice( + instance.Api, + physicalDevice.Key, + physicalDevice.Value, + surface.ApiHandle, + out var queueCount, + out var queueFamilyIndex)) + { + return new VulkanPhysicalDevice(physicalDevice.Key, instance.Api, queueCount, queueFamilyIndex); + } + + throw new Exception("No suitable physical device found"); + } + + private static unsafe bool IsSuitableDevice(Vk api, PhysicalDevice physicalDevice, PhysicalDeviceProperties properties, SurfaceKHR surface, + out uint queueCount, out uint familyIndex) + { + queueCount = 0; + familyIndex = 0; + + if (properties.DeviceType == PhysicalDeviceType.Cpu) return false; + + var extensionMatches = 0; + uint propertiesCount; + + api.EnumerateDeviceExtensionProperties(physicalDevice, (byte*)null, &propertiesCount, null).ThrowOnError(); + + var extensionProperties = new ExtensionProperties[propertiesCount]; + + fixed (ExtensionProperties* pExtensionProperties = extensionProperties) + { + api.EnumerateDeviceExtensionProperties( + physicalDevice, + (byte*)null, + &propertiesCount, + pExtensionProperties).ThrowOnError(); + + for (var i = 0; i < propertiesCount; i++) + { + var extensionName = Marshal.PtrToStringAnsi((IntPtr)pExtensionProperties[i].ExtensionName); + + if (VulkanInitialization.RequiredExtensions.Contains(extensionName)) + { + extensionMatches++; + } + } + } + + if (extensionMatches == VulkanInitialization.RequiredExtensions.Length) + { + familyIndex = FindSuitableQueueFamily(api, physicalDevice, surface, out queueCount); + + return familyIndex != uint.MaxValue; + } + + return false; + } + + internal unsafe string[] GetSupportedExtensions() + { + uint propertiesCount; + + Api.EnumerateDeviceExtensionProperties(InternalHandle, (byte*)null, &propertiesCount, null).ThrowOnError(); + + var extensionProperties = new ExtensionProperties[propertiesCount]; + + fixed (ExtensionProperties* pExtensionProperties = extensionProperties) + { + Api.EnumerateDeviceExtensionProperties(InternalHandle, (byte*)null, &propertiesCount, pExtensionProperties) + .ThrowOnError(); + } + + return extensionProperties.Select(x => Marshal.PtrToStringAnsi((IntPtr)x.ExtensionName)).ToArray(); + } + + private static unsafe uint FindSuitableQueueFamily(Vk api, PhysicalDevice physicalDevice, SurfaceKHR surface, + out uint queueCount) + { + const QueueFlags RequiredFlags = QueueFlags.QueueGraphicsBit | QueueFlags.QueueComputeBit; + + var khrSurface = new KhrSurface(api.Context); + + uint propertiesCount; + + api.GetPhysicalDeviceQueueFamilyProperties(physicalDevice, &propertiesCount, null); + + var properties = new QueueFamilyProperties[propertiesCount]; + + fixed (QueueFamilyProperties* pProperties = properties) + { + api.GetPhysicalDeviceQueueFamilyProperties(physicalDevice, &propertiesCount, pProperties); + } + + for (uint index = 0; index < propertiesCount; index++) + { + var queueFlags = properties[index].QueueFlags; + + khrSurface.GetPhysicalDeviceSurfaceSupport(physicalDevice, index, surface, out var surfaceSupported) + .ThrowOnError(); + + if (queueFlags.HasFlag(RequiredFlags) && surfaceSupported) + { + queueCount = properties[index].QueueCount; + return index; + } + } + + queueCount = 0; + return uint.MaxValue; + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanPlatformInterface.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanPlatformInterface.cs new file mode 100644 index 000000000..77fedae0c --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanPlatformInterface.cs @@ -0,0 +1,84 @@ +using System; +using Avalonia; +using Ryujinx.Ava.Ui.Vulkan.Surfaces; +using Silk.NET.Vulkan; +using Ryujinx.Graphics.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + internal class VulkanPlatformInterface : IDisposable + { + private static VulkanOptions _options; + + private VulkanPlatformInterface(VulkanInstance instance) + { + Instance = instance; + Api = instance.Api; + } + + public VulkanPhysicalDevice PhysicalDevice { get; private set; } + public VulkanInstance Instance { get; } + public VulkanDevice Device { get; set; } + public Vk Api { get; private set; } + + public void Dispose() + { + Device?.Dispose(); + Instance?.Dispose(); + Api?.Dispose(); + } + + private static VulkanPlatformInterface TryCreate() + { + try + { + _options = AvaloniaLocator.Current.GetService() ?? new VulkanOptions(); + + var instance = VulkanInstance.Create(_options); + + return new VulkanPlatformInterface(instance); + } + catch (Exception ex) + { + return null; + } + } + + public static bool TryInitialize() + { + var feature = TryCreate(); + if (feature != null) + { + AvaloniaLocator.CurrentMutable.Bind().ToConstant(feature); + return true; + } + + return false; + } + + public VulkanSurfaceRenderTarget CreateRenderTarget(IVulkanPlatformSurface platformSurface) + { + var surface = VulkanSurface.CreateSurface(Instance, platformSurface); + try + { + if (Device == null) + { + PhysicalDevice = VulkanPhysicalDevice.FindSuitablePhysicalDevice(Instance, surface, _options.PreferDiscreteGpu, _options.PreferredDevice); + var device = VulkanInitialization.CreateDevice(Instance.Api, + PhysicalDevice.InternalHandle, + PhysicalDevice.QueueFamilyIndex, + VulkanInitialization.GetSupportedExtensions(Instance.Api, PhysicalDevice.InternalHandle), + PhysicalDevice.QueueCount); + + Device = new VulkanDevice(device, PhysicalDevice, Instance.Api); + } + } + catch (Exception) + { + surface.Dispose(); + } + + return new VulkanSurfaceRenderTarget(this, surface); + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanQueue.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanQueue.cs new file mode 100644 index 000000000..a903e21a6 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanQueue.cs @@ -0,0 +1,18 @@ +using System; +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + internal class VulkanQueue + { + public VulkanQueue(VulkanDevice device, Queue apiHandle) + { + Device = device; + InternalHandle = apiHandle; + } + + public VulkanDevice Device { get; } + public IntPtr Handle => InternalHandle.Handle; + internal Queue InternalHandle { get; } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanSemaphorePair.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanSemaphorePair.cs new file mode 100644 index 000000000..3b5fd9cc6 --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanSemaphorePair.cs @@ -0,0 +1,32 @@ +using System; +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + internal class VulkanSemaphorePair : IDisposable + { + private readonly VulkanDevice _device; + + public unsafe VulkanSemaphorePair(VulkanDevice device) + { + _device = device; + + var semaphoreCreateInfo = new SemaphoreCreateInfo { SType = StructureType.SemaphoreCreateInfo }; + + _device.Api.CreateSemaphore(_device.InternalHandle, semaphoreCreateInfo, null, out var semaphore).ThrowOnError(); + ImageAvailableSemaphore = semaphore; + + _device.Api.CreateSemaphore(_device.InternalHandle, semaphoreCreateInfo, null, out semaphore).ThrowOnError(); + RenderFinishedSemaphore = semaphore; + } + + internal Semaphore ImageAvailableSemaphore { get; } + internal Semaphore RenderFinishedSemaphore { get; } + + public unsafe void Dispose() + { + _device.Api.DestroySemaphore(_device.InternalHandle, ImageAvailableSemaphore, null); + _device.Api.DestroySemaphore(_device.InternalHandle, RenderFinishedSemaphore, null); + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanSurface.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanSurface.cs new file mode 100644 index 000000000..50b76c5ac --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanSurface.cs @@ -0,0 +1,77 @@ +using System; +using Avalonia; +using Ryujinx.Ava.Ui.Vulkan.Surfaces; +using Silk.NET.Vulkan; +using Silk.NET.Vulkan.Extensions.KHR; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + public class VulkanSurface : IDisposable + { + private readonly VulkanInstance _instance; + private readonly IVulkanPlatformSurface _vulkanPlatformSurface; + + private VulkanSurface(IVulkanPlatformSurface vulkanPlatformSurface, VulkanInstance instance) + { + _vulkanPlatformSurface = vulkanPlatformSurface; + _instance = instance; + ApiHandle = vulkanPlatformSurface.CreateSurface(instance); + } + + internal SurfaceKHR ApiHandle { get; } + + internal static KhrSurface SurfaceExtension { get; private set; } + + internal PixelSize SurfaceSize => _vulkanPlatformSurface.SurfaceSize; + + public unsafe void Dispose() + { + SurfaceExtension.DestroySurface(_instance.InternalHandle, ApiHandle, null); + _vulkanPlatformSurface.Dispose(); + } + + internal static VulkanSurface CreateSurface(VulkanInstance instance, IVulkanPlatformSurface vulkanPlatformSurface) + { + if (SurfaceExtension == null) + { + instance.Api.TryGetInstanceExtension(instance.InternalHandle, out KhrSurface extension); + + SurfaceExtension = extension; + } + + return new VulkanSurface(vulkanPlatformSurface, instance); + } + + internal bool CanSurfacePresent(VulkanPhysicalDevice physicalDevice) + { + SurfaceExtension.GetPhysicalDeviceSurfaceSupport(physicalDevice.InternalHandle, physicalDevice.QueueFamilyIndex, ApiHandle, out var isSupported); + + return isSupported; + } + + internal unsafe SurfaceFormatKHR GetSurfaceFormat(VulkanPhysicalDevice physicalDevice) + { + uint surfaceFormatsCount; + + SurfaceExtension.GetPhysicalDeviceSurfaceFormats(physicalDevice.InternalHandle, ApiHandle, + &surfaceFormatsCount, null); + + var surfaceFormats = new SurfaceFormatKHR[surfaceFormatsCount]; + + fixed (SurfaceFormatKHR* pSurfaceFormats = surfaceFormats) + { + SurfaceExtension.GetPhysicalDeviceSurfaceFormats(physicalDevice.InternalHandle, ApiHandle, + &surfaceFormatsCount, pSurfaceFormats); + } + + if (surfaceFormats.Length == 1 && surfaceFormats[0].Format == Format.Undefined) + return new SurfaceFormatKHR(Format.B8G8R8A8Unorm, ColorSpaceKHR.ColorspaceSrgbNonlinearKhr); + foreach (var format in surfaceFormats) + if (format.Format == Format.B8G8R8A8Unorm && + format.ColorSpace == ColorSpaceKHR.ColorspaceSrgbNonlinearKhr) + return format; + + return surfaceFormats[0]; + } + } +} diff --git a/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanSurfaceRenderingSession.cs b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanSurfaceRenderingSession.cs new file mode 100644 index 000000000..87267cd0b --- /dev/null +++ b/Ryujinx.Ava/Ui/Backend/Vulkan/VulkanSurfaceRenderingSession.cs @@ -0,0 +1,48 @@ +using System; +using Avalonia; +using Ryujinx.Ava.Ui.Vulkan.Surfaces; +using Silk.NET.Vulkan; + +namespace Ryujinx.Ava.Ui.Vulkan +{ + internal class VulkanSurfaceRenderingSession : IDisposable + { + private readonly VulkanDevice _device; + private readonly VulkanSurfaceRenderTarget _renderTarget; + private VulkanCommandBufferPool.VulkanCommandBuffer _commandBuffer; + + public VulkanSurfaceRenderingSession(VulkanDisplay display, VulkanDevice device, + VulkanSurfaceRenderTarget renderTarget, float scaling) + { + Display = display; + _device = device; + _renderTarget = renderTarget; + Scaling = scaling; + Begin(); + } + + public VulkanDisplay Display { get; } + + public PixelSize Size => _renderTarget.Size; + public Vk Api => _device.Api; + + public float Scaling { get; } + + public bool IsYFlipped { get; } = true; + + public void Dispose() + { + _commandBuffer = Display.StartPresentation(_renderTarget); + + Display.BlitImageToCurrentImage(_renderTarget, _commandBuffer.InternalHandle); + + Display.EndPresentation(_commandBuffer); + } + + private void Begin() + { + if (!Display.EnsureSwapchainAvailable()) + _renderTarget.Invalidate(); + } + } +} diff --git a/Ryujinx.Ava/Ui/Controls/OpenGLRendererControl.cs b/Ryujinx.Ava/Ui/Controls/OpenGLRendererControl.cs new file mode 100644 index 000000000..b25c0621b --- /dev/null +++ b/Ryujinx.Ava/Ui/Controls/OpenGLRendererControl.cs @@ -0,0 +1,192 @@ +using Avalonia; +using Avalonia.OpenGL; +using Avalonia.Platform; +using Avalonia.Rendering.SceneGraph; +using Avalonia.Skia; +using Avalonia.Threading; +using OpenTK.Graphics.OpenGL; +using Ryujinx.Common.Configuration; +using SkiaSharp; +using SPB.Graphics; +using SPB.Graphics.OpenGL; +using SPB.Platform; +using SPB.Windowing; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Ryujinx.Ava.Ui.Controls +{ + internal class OpenGLRendererControl : RendererControl + { + public int Major { get; } + public int Minor { get; } + public OpenGLContextBase GameContext { get; set; } + + public static OpenGLContextBase PrimaryContext => AvaloniaLocator.Current.GetService().PrimaryContext.AsOpenGLContextBase(); + + private SwappableNativeWindowBase _gameBackgroundWindow; + + private IntPtr _fence; + + public OpenGLRendererControl(int major, int minor, GraphicsDebugLevel graphicsDebugLevel) : base(graphicsDebugLevel) + { + Major = major; + Minor = minor; + } + + public override void DestroyBackgroundContext() + { + _image = null; + + if (_fence != IntPtr.Zero) + { + DrawOperation.Dispose(); + GL.DeleteSync(_fence); + } + + GlDrawOperation.DeleteFramebuffer(); + + GameContext?.Dispose(); + + _gameBackgroundWindow?.Dispose(); + } + + internal override void Present(object image) + { + Dispatcher.UIThread.InvokeAsync(() => + { + Image = (int)image; + }).Wait(); + + if (_fence != IntPtr.Zero) + { + GL.DeleteSync(_fence); + } + + _fence = GL.FenceSync(SyncCondition.SyncGpuCommandsComplete, WaitSyncFlags.None); + + QueueRender(); + + _gameBackgroundWindow.SwapBuffers(); + } + + internal override void MakeCurrent() + { + GameContext.MakeCurrent(_gameBackgroundWindow); + } + + internal override void MakeCurrent(SwappableNativeWindowBase window) + { + GameContext.MakeCurrent(window); + } + + protected override void CreateWindow() + { + var flags = OpenGLContextFlags.Compat; + if (DebugLevel != GraphicsDebugLevel.None) + { + flags |= OpenGLContextFlags.Debug; + } + _gameBackgroundWindow = PlatformHelper.CreateOpenGLWindow(FramebufferFormat.Default, 0, 0, 100, 100); + _gameBackgroundWindow.Hide(); + + GameContext = PlatformHelper.CreateOpenGLContext(FramebufferFormat.Default, Major, Minor, flags, shareContext: PrimaryContext); + GameContext.Initialize(_gameBackgroundWindow); + } + + protected override ICustomDrawOperation CreateDrawOperation() + { + return new GlDrawOperation(this); + } + + private class GlDrawOperation : ICustomDrawOperation + { + private static int _framebuffer; + + public Rect Bounds { get; } + + private readonly OpenGLRendererControl _control; + + public GlDrawOperation(OpenGLRendererControl control) + { + _control = control; + Bounds = _control.Bounds; + } + + public void Dispose() { } + + public static void DeleteFramebuffer() + { + if (_framebuffer == 0) + { + GL.DeleteFramebuffer(_framebuffer); + } + + _framebuffer = 0; + } + + public bool Equals(ICustomDrawOperation other) + { + return other is GlDrawOperation operation && Equals(this, operation) && operation.Bounds == Bounds; + } + + public bool HitTest(Point p) + { + return Bounds.Contains(p); + } + + private void CreateRenderTarget() + { + _framebuffer = GL.GenFramebuffer(); + } + + public void Render(IDrawingContextImpl context) + { + if (_control.Image == null) + { + return; + } + + if (_framebuffer == 0) + { + CreateRenderTarget(); + } + + int currentFramebuffer = GL.GetInteger(GetPName.FramebufferBinding); + + var image = _control.Image; + var fence = _control._fence; + + GL.BindFramebuffer(FramebufferTarget.Framebuffer, _framebuffer); + GL.FramebufferTexture2D(FramebufferTarget.Framebuffer, FramebufferAttachment.ColorAttachment0, TextureTarget.Texture2D, (int)image, 0); + GL.BindFramebuffer(FramebufferTarget.Framebuffer, currentFramebuffer); + + if (context is not ISkiaDrawingContextImpl skiaDrawingContextImpl) + { + return; + } + + var imageInfo = new SKImageInfo((int)_control.RenderSize.Width, (int)_control.RenderSize.Height, SKColorType.Rgba8888); + var glInfo = new GRGlFramebufferInfo((uint)_framebuffer, SKColorType.Rgba8888.ToGlSizedFormat()); + + GL.WaitSync(fence, WaitSyncFlags.None, ulong.MaxValue); + + using var backendTexture = new GRBackendRenderTarget(imageInfo.Width, imageInfo.Height, 1, 0, glInfo); + using var surface = SKSurface.Create(skiaDrawingContextImpl.GrContext, backendTexture, GRSurfaceOrigin.BottomLeft, SKColorType.Rgba8888); + + if (surface == null) + { + return; + } + + var rect = new Rect(new Point(), _control.RenderSize); + + using var snapshot = surface.Snapshot(); + skiaDrawingContextImpl.SkCanvas.DrawImage(snapshot, rect.ToSKRect(), _control.Bounds.ToSKRect(), new SKPaint()); + } + } + } +} diff --git a/Ryujinx.Ava/Ui/Controls/RendererControl.cs b/Ryujinx.Ava/Ui/Controls/RendererControl.cs index a728946cf..2239a5824 100644 --- a/Ryujinx.Ava/Ui/Controls/RendererControl.cs +++ b/Ryujinx.Ava/Ui/Controls/RendererControl.cs @@ -2,65 +2,45 @@ using Avalonia.Controls; using Avalonia.Data; using Avalonia.Media; -using Avalonia.OpenGL; -using Avalonia.Platform; using Avalonia.Rendering.SceneGraph; -using Avalonia.Skia; -using Avalonia.Threading; -using OpenTK.Graphics.OpenGL; using Ryujinx.Common.Configuration; -using SkiaSharp; -using SPB.Graphics; -using SPB.Graphics.OpenGL; -using SPB.Platform; using SPB.Windowing; using System; namespace Ryujinx.Ava.Ui.Controls { - public class RendererControl : Control + public abstract class RendererControl : Control { - private int _image; + protected object _image; static RendererControl() { AffectsRender(ImageProperty); } - public readonly static StyledProperty ImageProperty = - AvaloniaProperty.Register(nameof(Image), 0, inherits: true, defaultBindingMode: BindingMode.TwoWay); + public readonly static StyledProperty ImageProperty = + AvaloniaProperty.Register(nameof(Image), 0, inherits: true, defaultBindingMode: BindingMode.TwoWay); - protected int Image + protected object Image { get => _image; set => SetAndRaise(ImageProperty, ref _image, value); } - public event EventHandler GlInitialized; + public event EventHandler RendererInitialized; public event EventHandler SizeChanged; protected Size RenderSize { get; private set; } public bool IsStarted { get; private set; } - public int Major { get; } - public int Minor { get; } public GraphicsDebugLevel DebugLevel { get; } - public OpenGLContextBase GameContext { get; set; } - - public static OpenGLContextBase PrimaryContext => AvaloniaLocator.Current.GetService().PrimaryContext.AsOpenGLContextBase(); - - private SwappableNativeWindowBase _gameBackgroundWindow; private bool _isInitialized; - private IntPtr _fence; + protected ICustomDrawOperation DrawOperation { get; private set; } - private GlDrawOperation _glDrawOperation; - - public RendererControl(int major, int minor, GraphicsDebugLevel graphicsDebugLevel) + public RendererControl(GraphicsDebugLevel graphicsDebugLevel) { - Major = major; - Minor = minor; DebugLevel = graphicsDebugLevel; IObservable resizeObservable = this.GetObservable(BoundsProperty); @@ -69,7 +49,7 @@ namespace Ryujinx.Ava.Ui.Controls Focusable = true; } - private void Resized(Rect rect) + protected void Resized(Rect rect) { SizeChanged?.Invoke(this, rect.Size); @@ -77,37 +57,40 @@ namespace Ryujinx.Ava.Ui.Controls { RenderSize = rect.Size * VisualRoot.RenderScaling; - _glDrawOperation?.Dispose(); - _glDrawOperation = new GlDrawOperation(this); + DrawOperation?.Dispose(); + DrawOperation = CreateDrawOperation(); } } + protected abstract ICustomDrawOperation CreateDrawOperation(); + protected abstract void CreateWindow(); + public override void Render(DrawingContext context) { if (!_isInitialized) { CreateWindow(); - OnGlInitialized(); + OnRendererInitialized(); _isInitialized = true; } - if (GameContext == null || !IsStarted || Image == 0) + if (!IsStarted || Image == null) { return; } - if (_glDrawOperation != null) + if (DrawOperation != null) { - context.Custom(_glDrawOperation); + context.Custom(DrawOperation); } base.Render(context); } - protected void OnGlInitialized() + protected void OnRendererInitialized() { - GlInitialized?.Invoke(this, EventArgs.Empty); + RendererInitialized?.Invoke(this, EventArgs.Empty); } public void QueueRender() @@ -115,24 +98,7 @@ namespace Ryujinx.Ava.Ui.Controls Program.RenderTimer.TickNow(); } - internal void Present(object image) - { - Dispatcher.UIThread.InvokeAsync(() => - { - Image = (int)image; - }).Wait(); - - if (_fence != IntPtr.Zero) - { - GL.DeleteSync(_fence); - } - - _fence = GL.FenceSync(SyncCondition.SyncGpuCommandsComplete, WaitSyncFlags.None); - - QueueRender(); - - _gameBackgroundWindow.SwapBuffers(); - } + internal abstract void Present(object image); internal void Start() { @@ -145,132 +111,8 @@ namespace Ryujinx.Ava.Ui.Controls IsStarted = false; } - public void DestroyBackgroundContext() - { - _image = 0; - - if (_fence != IntPtr.Zero) - { - _glDrawOperation.Dispose(); - GL.DeleteSync(_fence); - } - - GlDrawOperation.DeleteFramebuffer(); - - GameContext?.Dispose(); - - _gameBackgroundWindow?.Dispose(); - } - - internal void MakeCurrent() - { - GameContext.MakeCurrent(_gameBackgroundWindow); - } - - internal void MakeCurrent(SwappableNativeWindowBase window) - { - GameContext.MakeCurrent(window); - } - - protected void CreateWindow() - { - var flags = OpenGLContextFlags.Compat; - if (DebugLevel != GraphicsDebugLevel.None) - { - flags |= OpenGLContextFlags.Debug; - } - _gameBackgroundWindow = PlatformHelper.CreateOpenGLWindow(FramebufferFormat.Default, 0, 0, 100, 100); - _gameBackgroundWindow.Hide(); - - GameContext = PlatformHelper.CreateOpenGLContext(FramebufferFormat.Default, Major, Minor, flags, shareContext: PrimaryContext); - GameContext.Initialize(_gameBackgroundWindow); - } - - private class GlDrawOperation : ICustomDrawOperation - { - private static int _framebuffer; - - public Rect Bounds { get; } - - private readonly RendererControl _control; - - public GlDrawOperation(RendererControl control) - { - _control = control; - Bounds = _control.Bounds; - } - - public void Dispose() { } - - public static void DeleteFramebuffer() - { - if (_framebuffer == 0) - { - GL.DeleteFramebuffer(_framebuffer); - } - - _framebuffer = 0; - } - - public bool Equals(ICustomDrawOperation other) - { - return other is GlDrawOperation operation && Equals(this, operation) && operation.Bounds == Bounds; - } - - public bool HitTest(Point p) - { - return Bounds.Contains(p); - } - - private void CreateRenderTarget() - { - _framebuffer = GL.GenFramebuffer(); - } - - public void Render(IDrawingContextImpl context) - { - if (_control.Image == 0) - { - return; - } - - if (_framebuffer == 0) - { - CreateRenderTarget(); - } - - int currentFramebuffer = GL.GetInteger(GetPName.FramebufferBinding); - - var image = _control.Image; - var fence = _control._fence; - - GL.BindFramebuffer(FramebufferTarget.Framebuffer, _framebuffer); - GL.FramebufferTexture2D(FramebufferTarget.Framebuffer, FramebufferAttachment.ColorAttachment0, TextureTarget.Texture2D, image, 0); - GL.BindFramebuffer(FramebufferTarget.Framebuffer, currentFramebuffer); - - if (context is not ISkiaDrawingContextImpl skiaDrawingContextImpl) - { - return; - } - - var imageInfo = new SKImageInfo((int)_control.RenderSize.Width, (int)_control.RenderSize.Height, SKColorType.Rgba8888); - var glInfo = new GRGlFramebufferInfo((uint)_framebuffer, SKColorType.Rgba8888.ToGlSizedFormat()); - - GL.WaitSync(fence, WaitSyncFlags.None, ulong.MaxValue); - - using var backendTexture = new GRBackendRenderTarget(imageInfo.Width, imageInfo.Height, 1, 0, glInfo); - using var surface = SKSurface.Create(skiaDrawingContextImpl.GrContext, backendTexture, GRSurfaceOrigin.BottomLeft, SKColorType.Rgba8888); - - if (surface == null) - { - return; - } - - var rect = new Rect(new Point(), _control.RenderSize); - - using var snapshot = surface.Snapshot(); - skiaDrawingContextImpl.SkCanvas.DrawImage(snapshot, rect.ToSKRect(), _control.Bounds.ToSKRect(), new SKPaint()); - } - } + public abstract void DestroyBackgroundContext(); + internal abstract void MakeCurrent(); + internal abstract void MakeCurrent(SwappableNativeWindowBase window); } -} +} \ No newline at end of file diff --git a/Ryujinx.Ava/Ui/Controls/VulkanRendererControl.cs b/Ryujinx.Ava/Ui/Controls/VulkanRendererControl.cs new file mode 100644 index 000000000..fdbd8df97 --- /dev/null +++ b/Ryujinx.Ava/Ui/Controls/VulkanRendererControl.cs @@ -0,0 +1,153 @@ +using Avalonia; +using Avalonia.Platform; +using Avalonia.Rendering.SceneGraph; +using Avalonia.Skia; +using Avalonia.Threading; +using Ryujinx.Ava.Ui.Backend.Vulkan; +using Ryujinx.Ava.Ui.Vulkan; +using Ryujinx.Common.Configuration; +using Ryujinx.Graphics.Vulkan; +using Silk.NET.Vulkan; +using SkiaSharp; +using SPB.Windowing; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Ryujinx.Ava.Ui.Controls +{ + internal class VulkanRendererControl : RendererControl + { + private VulkanPlatformInterface _platformInterface; + + public VulkanRendererControl(GraphicsDebugLevel graphicsDebugLevel) : base(graphicsDebugLevel) + { + _platformInterface = AvaloniaLocator.Current.GetService(); + } + + public override void DestroyBackgroundContext() + { + + } + + protected override ICustomDrawOperation CreateDrawOperation() + { + return new VulkanDrawOperation(this); + } + + protected override void CreateWindow() + { + } + + internal override void MakeCurrent() + { + } + + internal override void MakeCurrent(SwappableNativeWindowBase window) + { + } + + internal override void Present(object image) + { + Dispatcher.UIThread.InvokeAsync(() => + { + Image = image; + }).Wait(); + + QueueRender(); + } + + private class VulkanDrawOperation : ICustomDrawOperation + { + public Rect Bounds { get; } + + private readonly VulkanRendererControl _control; + + public VulkanDrawOperation(VulkanRendererControl control) + { + _control = control; + Bounds = _control.Bounds; + } + + public void Dispose() + { + + } + + public bool Equals(ICustomDrawOperation other) + { + return other is VulkanDrawOperation operation && Equals(this, operation) && operation.Bounds == Bounds; + } + + public bool HitTest(Point p) + { + return Bounds.Contains(p); + } + + public void Render(IDrawingContextImpl context) + { + if (_control.Image == null || _control.RenderSize.Width == 0 || _control.RenderSize.Height == 0) + { + return; + } + + var image = (PresentImageInfo)_control.Image; + + if (context is not ISkiaDrawingContextImpl skiaDrawingContextImpl) + { + return; + } + + _control._platformInterface.Device.QueueWaitIdle(); + + var gpu = AvaloniaLocator.Current.GetService(); + + var imageInfo = new GRVkImageInfo() + { + CurrentQueueFamily = _control._platformInterface.PhysicalDevice.QueueFamilyIndex, + Format = (uint)Format.R8G8B8A8Unorm, + Image = image.Image.Handle, + ImageLayout = (uint)ImageLayout.ColorAttachmentOptimal, + ImageTiling = (uint)ImageTiling.Optimal, + ImageUsageFlags = (uint)(ImageUsageFlags.ImageUsageColorAttachmentBit + | ImageUsageFlags.ImageUsageTransferSrcBit + | ImageUsageFlags.ImageUsageTransferDstBit), + LevelCount = 1, + SampleCount = 1, + Protected = false, + Alloc = new GRVkAlloc() + { + Memory = image.Memory.Handle, + Flags = 0, + Offset = image.MemoryOffset, + Size = image.MemorySize + } + }; + + using var backendTexture = new GRBackendRenderTarget( + (int)_control.RenderSize.Width, + (int)_control.RenderSize.Height, + 1, + imageInfo); + + using var surface = SKSurface.Create( + gpu.GrContext, + backendTexture, + GRSurfaceOrigin.TopLeft, + SKColorType.Rgba8888); + + if (surface == null) + { + return; + } + + var rect = new Rect(new Point(), _control.RenderSize); + + using var snapshot = surface.Snapshot(); + skiaDrawingContextImpl.SkCanvas.DrawImage(snapshot, rect.ToSKRect(), _control.Bounds.ToSKRect(), new SKPaint()); + } + } + } +} diff --git a/Ryujinx.Ava/Ui/Windows/MainWindow.axaml.cs b/Ryujinx.Ava/Ui/Windows/MainWindow.axaml.cs index 016fed59d..2199e46ec 100644 --- a/Ryujinx.Ava/Ui/Windows/MainWindow.axaml.cs +++ b/Ryujinx.Ava/Ui/Windows/MainWindow.axaml.cs @@ -61,7 +61,7 @@ namespace Ryujinx.Ava.Ui.Windows public AppHost AppHost { get; private set; } public InputManager InputManager { get; private set; } - public RendererControl GlRenderer { get; private set; } + public RendererControl RendererControl { get; private set; } public ContentControl ContentFrame { get; private set; } public TextBlock LoadStatus { get; private set; } public TextBlock FirmwareStatus { get; private set; } @@ -257,8 +257,8 @@ namespace Ryujinx.Ava.Ui.Windows _mainViewContent = ContentFrame.Content as Control; - GlRenderer = new RendererControl(3, 3, ConfigurationState.Instance.Logger.GraphicsDebugLevel); - AppHost = new AppHost(GlRenderer, InputManager, path, VirtualFileSystem, ContentManager, AccountManager, _userChannelPersistence, this); + RendererControl = Program.UseVulkan ? new VulkanRendererControl(ConfigurationState.Instance.Logger.GraphicsDebugLevel) : new OpenGLRendererControl(3, 3, ConfigurationState.Instance.Logger.GraphicsDebugLevel); + AppHost = new AppHost(RendererControl, InputManager, path, VirtualFileSystem, ContentManager, AccountManager, _userChannelPersistence, this); if (!AppHost.LoadGuestApplication().Result) { @@ -282,7 +282,7 @@ namespace Ryujinx.Ava.Ui.Windows private void InitializeGame() { - GlRenderer.GlInitialized += GlRenderer_Created; + RendererControl.RendererInitialized += GlRenderer_Created; AppHost.StatusUpdatedEvent += Update_StatusBar; AppHost.AppExit += AppHost_AppExit; @@ -322,14 +322,14 @@ namespace Ryujinx.Ava.Ui.Windows Dispatcher.UIThread.InvokeAsync(() => { - ContentFrame.Content = GlRenderer; + ContentFrame.Content = RendererControl; if (startFullscreen && WindowState != WindowState.FullScreen) { ViewModel.ToggleFullscreen(); } - GlRenderer.Focus(); + RendererControl.Focus(); }); } @@ -380,8 +380,8 @@ namespace Ryujinx.Ava.Ui.Windows HandleRelaunch(); }); - GlRenderer.GlInitialized -= GlRenderer_Created; - GlRenderer = null; + RendererControl.RendererInitialized -= GlRenderer_Created; + RendererControl = null; ViewModel.SelectedIcon = null; @@ -544,6 +544,7 @@ namespace Ryujinx.Ava.Ui.Windows GraphicsConfig.MaxAnisotropy = ConfigurationState.Instance.Graphics.MaxAnisotropy; GraphicsConfig.ShadersDumpPath = ConfigurationState.Instance.Graphics.ShadersDumpPath; GraphicsConfig.EnableShaderCache = ConfigurationState.Instance.Graphics.EnableShaderCache; + GraphicsConfig.EnableTextureRecompression = ConfigurationState.Instance.Graphics.EnableTextureRecompression; } public void LoadHotKeys() diff --git a/Ryujinx.Common/Configuration/GraphicsBackend.cs b/Ryujinx.Common/Configuration/GraphicsBackend.cs new file mode 100644 index 000000000..26e4a28a9 --- /dev/null +++ b/Ryujinx.Common/Configuration/GraphicsBackend.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Common.Configuration +{ + public enum GraphicsBackend + { + Vulkan, + OpenGl + } +} diff --git a/Ryujinx.Common/Memory/StructArrayHelpers.cs b/Ryujinx.Common/Memory/StructArrayHelpers.cs index fbb2902d5..bba05c10b 100644 --- a/Ryujinx.Common/Memory/StructArrayHelpers.cs +++ b/Ryujinx.Common/Memory/StructArrayHelpers.cs @@ -640,4 +640,15 @@ namespace Ryujinx.Common.Memory public ref T this[int index] => ref ToSpan()[index]; public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 64); } + public struct Array73 : IArray where T : unmanaged + { +#pragma warning disable CS0169 + T _e0; + Array64 _other; + Array8 _other2; +#pragma warning restore CS0169 + public int Length => 73; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 73); + } } diff --git a/Ryujinx.Common/System/ForceDpiAware.cs b/Ryujinx.Common/System/ForceDpiAware.cs index f29630a62..f1aa3e8e8 100644 --- a/Ryujinx.Common/System/ForceDpiAware.cs +++ b/Ryujinx.Common/System/ForceDpiAware.cs @@ -44,7 +44,7 @@ namespace Ryujinx.Common.System } } - public static double GetWindowScaleFactor() + public static double GetActualScaleFactor() { double userDpiScale = 96.0; @@ -84,6 +84,13 @@ namespace Ryujinx.Common.System Logger.Warning?.Print(LogClass.Application, $"Couldn't determine monitor DPI: {e.Message}"); } + return userDpiScale; + } + + public static double GetWindowScaleFactor() + { + double userDpiScale = GetActualScaleFactor(); + return Math.Min(userDpiScale / _standardDpiScale, _maxScaleFactor); } } diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index af8541fb8..d7388476b 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -11,11 +11,15 @@ namespace Ryujinx.Graphics.GAL public readonly bool HasVectorIndexingBug; public readonly bool SupportsAstcCompression; + public readonly bool SupportsBc123Compression; + public readonly bool SupportsBc45Compression; + public readonly bool SupportsBc67Compression; public readonly bool Supports3DTextureCompression; public readonly bool SupportsBgraFormat; public readonly bool SupportsR4G4Format; public readonly bool SupportsFragmentShaderInterlock; public readonly bool SupportsFragmentShaderOrderingIntel; + public readonly bool SupportsGeometryShaderPassthrough; public readonly bool SupportsImageLoadFormatted; public readonly bool SupportsMismatchingViewFormat; public readonly bool SupportsNonConstantTextureOffset; @@ -24,6 +28,11 @@ namespace Ryujinx.Graphics.GAL public readonly bool SupportsViewportSwizzle; public readonly bool SupportsIndirectParameters; + public readonly uint MaximumUniformBuffersPerStage; + public readonly uint MaximumStorageBuffersPerStage; + public readonly uint MaximumTexturesPerStage; + public readonly uint MaximumImagesPerStage; + public readonly int MaximumComputeSharedMemorySize; public readonly float MaximumSupportedAnisotropy; public readonly int StorageBufferOffsetAlignment; @@ -34,11 +43,15 @@ namespace Ryujinx.Graphics.GAL bool hasFrontFacingBug, bool hasVectorIndexingBug, bool supportsAstcCompression, + bool supportsBc123Compression, + bool supportsBc45Compression, + bool supportsBc67Compression, bool supports3DTextureCompression, bool supportsBgraFormat, bool supportsR4G4Format, bool supportsFragmentShaderInterlock, bool supportsFragmentShaderOrderingIntel, + bool supportsGeometryShaderPassthrough, bool supportsImageLoadFormatted, bool supportsMismatchingViewFormat, bool supportsNonConstantTextureOffset, @@ -46,6 +59,10 @@ namespace Ryujinx.Graphics.GAL bool supportsTextureShadowLod, bool supportsViewportSwizzle, bool supportsIndirectParameters, + uint maximumUniformBuffersPerStage, + uint maximumStorageBuffersPerStage, + uint maximumTexturesPerStage, + uint maximumImagesPerStage, int maximumComputeSharedMemorySize, float maximumSupportedAnisotropy, int storageBufferOffsetAlignment) @@ -55,11 +72,15 @@ namespace Ryujinx.Graphics.GAL HasFrontFacingBug = hasFrontFacingBug; HasVectorIndexingBug = hasVectorIndexingBug; SupportsAstcCompression = supportsAstcCompression; + SupportsBc123Compression = supportsBc123Compression; + SupportsBc45Compression = supportsBc45Compression; + SupportsBc67Compression = supportsBc67Compression; Supports3DTextureCompression = supports3DTextureCompression; SupportsBgraFormat = supportsBgraFormat; SupportsR4G4Format = supportsR4G4Format; SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock; SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel; + SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough; SupportsImageLoadFormatted = supportsImageLoadFormatted; SupportsMismatchingViewFormat = supportsMismatchingViewFormat; SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset; @@ -67,6 +88,10 @@ namespace Ryujinx.Graphics.GAL SupportsTextureShadowLod = supportsTextureShadowLod; SupportsViewportSwizzle = supportsViewportSwizzle; SupportsIndirectParameters = supportsIndirectParameters; + MaximumUniformBuffersPerStage = maximumUniformBuffersPerStage; + MaximumStorageBuffersPerStage = maximumStorageBuffersPerStage; + MaximumTexturesPerStage = maximumTexturesPerStage; + MaximumImagesPerStage = maximumImagesPerStage; MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize; MaximumSupportedAnisotropy = maximumSupportedAnisotropy; StorageBufferOffsetAlignment = storageBufferOffsetAlignment; diff --git a/Ryujinx.Graphics.GAL/DeviceInfo.cs b/Ryujinx.Graphics.GAL/DeviceInfo.cs new file mode 100644 index 000000000..c525eb601 --- /dev/null +++ b/Ryujinx.Graphics.GAL/DeviceInfo.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.GAL +{ + public struct DeviceInfo + { + public readonly string Id; + public readonly string Vendor; + public readonly string Name; + public readonly bool IsDiscrete; + + public DeviceInfo(string id, string vendor, string name, bool isDiscrete) + { + Id = id; + Vendor = vendor; + Name = name; + IsDiscrete = isDiscrete; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.GAL/Format.cs b/Ryujinx.Graphics.GAL/Format.cs index 50cc6d40c..db944844f 100644 --- a/Ryujinx.Graphics.GAL/Format.cs +++ b/Ryujinx.Graphics.GAL/Format.cs @@ -165,6 +165,120 @@ namespace Ryujinx.Graphics.GAL public static class FormatExtensions { + /// + /// Checks if the texture format is valid to use as image format. + /// + /// Texture format + /// True if the texture can be used as image, false otherwise + public static bool IsImageCompatible(this Format format) + { + switch (format) + { + case Format.R8Unorm: + case Format.R8Snorm: + case Format.R8Uint: + case Format.R8Sint: + case Format.R16Float: + case Format.R16Unorm: + case Format.R16Snorm: + case Format.R16Uint: + case Format.R16Sint: + case Format.R32Float: + case Format.R32Uint: + case Format.R32Sint: + case Format.R8G8Unorm: + case Format.R8G8Snorm: + case Format.R8G8Uint: + case Format.R8G8Sint: + case Format.R16G16Float: + case Format.R16G16Unorm: + case Format.R16G16Snorm: + case Format.R16G16Uint: + case Format.R16G16Sint: + case Format.R32G32Float: + case Format.R32G32Uint: + case Format.R32G32Sint: + case Format.R8G8B8A8Unorm: + case Format.R8G8B8A8Snorm: + case Format.R8G8B8A8Uint: + case Format.R8G8B8A8Sint: + case Format.R16G16B16A16Float: + case Format.R16G16B16A16Unorm: + case Format.R16G16B16A16Snorm: + case Format.R16G16B16A16Uint: + case Format.R16G16B16A16Sint: + case Format.R32G32B32A32Float: + case Format.R32G32B32A32Uint: + case Format.R32G32B32A32Sint: + case Format.R10G10B10A2Unorm: + case Format.R10G10B10A2Uint: + case Format.R11G11B10Float: + return true; + } + + return false; + } + + /// + /// Checks if the texture format is valid to use as render target color format. + /// + /// Texture format + /// True if the texture can be used as render target, false otherwise + public static bool IsRtColorCompatible(this Format format) + { + switch (format) + { + case Format.R32G32B32A32Float: + case Format.R32G32B32A32Sint: + case Format.R32G32B32A32Uint: + case Format.R16G16B16A16Unorm: + case Format.R16G16B16A16Snorm: + case Format.R16G16B16A16Sint: + case Format.R16G16B16A16Uint: + case Format.R16G16B16A16Float: + case Format.R32G32Float: + case Format.R32G32Sint: + case Format.R32G32Uint: + case Format.B8G8R8A8Unorm: + case Format.B8G8R8A8Srgb: + case Format.R10G10B10A2Unorm: + case Format.R10G10B10A2Uint: + case Format.R8G8B8A8Unorm: + case Format.R8G8B8A8Srgb: + case Format.R8G8B8A8Snorm: + case Format.R8G8B8A8Sint: + case Format.R8G8B8A8Uint: + case Format.R16G16Unorm: + case Format.R16G16Snorm: + case Format.R16G16Sint: + case Format.R16G16Uint: + case Format.R16G16Float: + case Format.R11G11B10Float: + case Format.R32Sint: + case Format.R32Uint: + case Format.R32Float: + case Format.B5G6R5Unorm: + case Format.B5G5R5A1Unorm: + case Format.R8G8Unorm: + case Format.R8G8Snorm: + case Format.R8G8Sint: + case Format.R8G8Uint: + case Format.R16Unorm: + case Format.R16Snorm: + case Format.R16Sint: + case Format.R16Uint: + case Format.R16Float: + case Format.R8Unorm: + case Format.R8Snorm: + case Format.R8Sint: + case Format.R8Uint: + case Format.B5G5R5X1Unorm: + return true; + } + + return false; + } + /// /// Checks if the texture format is an ASTC format. /// diff --git a/Ryujinx.Graphics.GAL/HardwareInfo.cs b/Ryujinx.Graphics.GAL/HardwareInfo.cs new file mode 100644 index 000000000..0c247074a --- /dev/null +++ b/Ryujinx.Graphics.GAL/HardwareInfo.cs @@ -0,0 +1,18 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Ryujinx.Graphics.GAL +{ + public struct HardwareInfo + { + public string GpuVendor { get; } + public string GpuModel { get; } + + public HardwareInfo(string gpuVendor, string gpuModel) + { + GpuVendor = gpuVendor; + GpuModel = gpuModel; + } + } +} diff --git a/Ryujinx.Graphics.GAL/IPipeline.cs b/Ryujinx.Graphics.GAL/IPipeline.cs index aec096e72..8e411f132 100644 --- a/Ryujinx.Graphics.GAL/IPipeline.cs +++ b/Ryujinx.Graphics.GAL/IPipeline.cs @@ -1,3 +1,4 @@ +using Ryujinx.Graphics.Shader; using System; namespace Ryujinx.Graphics.GAL @@ -10,9 +11,10 @@ namespace Ryujinx.Graphics.GAL void ClearBuffer(BufferHandle destination, int offset, int size, uint value); - void ClearRenderTargetColor(int index, uint componentMask, ColorF color); + void ClearRenderTargetColor(int index, int layer, uint componentMask, ColorF color); void ClearRenderTargetDepthStencil( + int layer, float depthValue, bool depthMask, int stencilValue, @@ -76,15 +78,13 @@ namespace Ryujinx.Graphics.GAL void SetRenderTargetColorMasks(ReadOnlySpan componentMask); void SetRenderTargets(ITexture[] colors, ITexture depthStencil); - void SetSampler(int binding, ISampler sampler); - - void SetScissor(int index, bool enable, int x, int y, int width, int height); + void SetScissors(ReadOnlySpan> regions); void SetStencilTest(StencilTestDescriptor stencilTest); void SetStorageBuffers(int first, ReadOnlySpan buffers); - void SetTexture(int binding, ITexture texture); + void SetTextureAndSampler(ShaderStage stage, int binding, ITexture texture, ISampler sampler); void SetTransformFeedbackBuffers(ReadOnlySpan buffers); void SetUniformBuffers(int first, ReadOnlySpan buffers); diff --git a/Ryujinx.Graphics.GAL/IRenderer.cs b/Ryujinx.Graphics.GAL/IRenderer.cs index b051e9dc8..8e48738db 100644 --- a/Ryujinx.Graphics.GAL/IRenderer.cs +++ b/Ryujinx.Graphics.GAL/IRenderer.cs @@ -30,6 +30,7 @@ namespace Ryujinx.Graphics.GAL ReadOnlySpan GetBufferData(BufferHandle buffer, int offset, int size); Capabilities GetCapabilities(); + HardwareInfo GetHardwareInfo(); IProgram LoadProgramBinary(byte[] programBinary, bool hasFragmentShader, ShaderInfo info); diff --git a/Ryujinx.Graphics.GAL/IShader.cs b/Ryujinx.Graphics.GAL/IShader.cs deleted file mode 100644 index be24adcda..000000000 --- a/Ryujinx.Graphics.GAL/IShader.cs +++ /dev/null @@ -1,6 +0,0 @@ -using System; - -namespace Ryujinx.Graphics.GAL -{ - public interface IShader : IDisposable { } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs b/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs index 442a90459..08c766df8 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs @@ -199,14 +199,12 @@ namespace Ryujinx.Graphics.GAL.Multithreading SetRenderTargetScaleCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.SetRenderTargets] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => SetRenderTargetsCommand.Run(ref GetCommand(memory), threaded, renderer); - _lookup[(int)CommandType.SetSampler] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => - SetSamplerCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.SetScissor] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => - SetScissorCommand.Run(ref GetCommand(memory), threaded, renderer); + SetScissorsCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.SetStencilTest] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => SetStencilTestCommand.Run(ref GetCommand(memory), threaded, renderer); - _lookup[(int)CommandType.SetTexture] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => - SetTextureCommand.Run(ref GetCommand(memory), threaded, renderer); + _lookup[(int)CommandType.SetTextureAndSampler] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => + SetTextureAndSamplerCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.SetUserClipDistance] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => SetUserClipDistanceCommand.Run(ref GetCommand(memory), threaded, renderer); _lookup[(int)CommandType.SetVertexAttribs] = (Span memory, ThreadedRenderer threaded, IRenderer renderer) => diff --git a/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs b/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs index 5c42abd12..69cda90c6 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs @@ -81,10 +81,9 @@ SetRenderTargetColorMasks, SetRenderTargetScale, SetRenderTargets, - SetSampler, SetScissor, SetStencilTest, - SetTexture, + SetTextureAndSampler, SetUserClipDistance, SetVertexAttribs, SetVertexBuffers, diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/ClearRenderTargetColorCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/ClearRenderTargetColorCommand.cs index 57509f1c0..cde69e7bb 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/ClearRenderTargetColorCommand.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/ClearRenderTargetColorCommand.cs @@ -4,19 +4,21 @@ { public CommandType CommandType => CommandType.ClearRenderTargetColor; private int _index; + private int _layer; private uint _componentMask; private ColorF _color; - public void Set(int index, uint componentMask, ColorF color) + public void Set(int index, int layer, uint componentMask, ColorF color) { _index = index; + _layer = layer; _componentMask = componentMask; _color = color; } public static void Run(ref ClearRenderTargetColorCommand command, ThreadedRenderer threaded, IRenderer renderer) { - renderer.Pipeline.ClearRenderTargetColor(command._index, command._componentMask, command._color); + renderer.Pipeline.ClearRenderTargetColor(command._index, command._layer, command._componentMask, command._color); } } } diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/ClearRenderTargetDepthStencilCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/ClearRenderTargetDepthStencilCommand.cs index 3692cd37b..c5c76539e 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/ClearRenderTargetDepthStencilCommand.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/ClearRenderTargetDepthStencilCommand.cs @@ -3,13 +3,15 @@ struct ClearRenderTargetDepthStencilCommand : IGALCommand { public CommandType CommandType => CommandType.ClearRenderTargetDepthStencil; + private int _layer; private float _depthValue; private bool _depthMask; private int _stencilValue; private int _stencilMask; - public void Set(float depthValue, bool depthMask, int stencilValue, int stencilMask) + public void Set(int layer, float depthValue, bool depthMask, int stencilValue, int stencilMask) { + _layer = layer; _depthValue = depthValue; _depthMask = depthMask; _stencilValue = stencilValue; @@ -18,7 +20,7 @@ public static void Run(ref ClearRenderTargetDepthStencilCommand command, ThreadedRenderer threaded, IRenderer renderer) { - renderer.Pipeline.ClearRenderTargetDepthStencil(command._depthValue, command._depthMask, command._stencilValue, command._stencilMask); + renderer.Pipeline.ClearRenderTargetDepthStencil(command._layer, command._depthValue, command._depthMask, command._stencilValue, command._stencilMask); } } } diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/SetSamplerCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/SetSamplerCommand.cs deleted file mode 100644 index f3be24dbf..000000000 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/SetSamplerCommand.cs +++ /dev/null @@ -1,23 +0,0 @@ -using Ryujinx.Graphics.GAL.Multithreading.Model; -using Ryujinx.Graphics.GAL.Multithreading.Resources; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands -{ - struct SetSamplerCommand : IGALCommand - { - public CommandType CommandType => CommandType.SetSampler; - private int _index; - private TableRef _sampler; - - public void Set(int index, TableRef sampler) - { - _index = index; - _sampler = sampler; - } - - public static void Run(ref SetSamplerCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - renderer.Pipeline.SetSampler(command._index, command._sampler.GetAs(threaded)?.Base); - } - } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/SetScissorCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/SetScissorCommand.cs deleted file mode 100644 index 6c95d0969..000000000 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/SetScissorCommand.cs +++ /dev/null @@ -1,28 +0,0 @@ -namespace Ryujinx.Graphics.GAL.Multithreading.Commands -{ - struct SetScissorCommand : IGALCommand - { - public CommandType CommandType => CommandType.SetScissor; - private int _index; - private bool _enable; - private int _x; - private int _y; - private int _width; - private int _height; - - public void Set(int index, bool enable, int x, int y, int width, int height) - { - _index = index; - _enable = enable; - _x = x; - _y = y; - _width = width; - _height = height; - } - - public static void Run(ref SetScissorCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - renderer.Pipeline.SetScissor(command._index, command._enable, command._x, command._y, command._width, command._height); - } - } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/SetScissorsCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/SetScissorsCommand.cs new file mode 100644 index 000000000..6966df6d5 --- /dev/null +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/SetScissorsCommand.cs @@ -0,0 +1,22 @@ +using Ryujinx.Graphics.GAL.Multithreading.Model; + +namespace Ryujinx.Graphics.GAL.Multithreading.Commands +{ + struct SetScissorsCommand : IGALCommand + { + public CommandType CommandType => CommandType.SetScissor; + private SpanRef> _scissors; + + public void Set(SpanRef> scissors) + { + _scissors = scissors; + } + + public static void Run(ref SetScissorsCommand command, ThreadedRenderer threaded, IRenderer renderer) + { + renderer.Pipeline.SetScissors(command._scissors.Get(threaded)); + + command._scissors.Dispose(threaded); + } + } +} diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/SetTextureAndSamplerCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/SetTextureAndSamplerCommand.cs new file mode 100644 index 000000000..7ef58c3d0 --- /dev/null +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/SetTextureAndSamplerCommand.cs @@ -0,0 +1,28 @@ +using Ryujinx.Graphics.GAL.Multithreading.Model; +using Ryujinx.Graphics.GAL.Multithreading.Resources; +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.GAL.Multithreading.Commands +{ + struct SetTextureAndSamplerCommand : IGALCommand + { + public CommandType CommandType => CommandType.SetTextureAndSampler; + private ShaderStage _stage; + private int _binding; + private TableRef _texture; + private TableRef _sampler; + + public void Set(ShaderStage stage, int binding, TableRef texture, TableRef sampler) + { + _stage = stage; + _binding = binding; + _texture = texture; + _sampler = sampler; + } + + public static void Run(ref SetTextureAndSamplerCommand command, ThreadedRenderer threaded, IRenderer renderer) + { + renderer.Pipeline.SetTextureAndSampler(command._stage, command._binding, command._texture.GetAs(threaded)?.Base, command._sampler.GetAs(threaded)?.Base); + } + } +} diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/SetTextureCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/SetTextureCommand.cs deleted file mode 100644 index e86f512be..000000000 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/SetTextureCommand.cs +++ /dev/null @@ -1,23 +0,0 @@ -using Ryujinx.Graphics.GAL.Multithreading.Model; -using Ryujinx.Graphics.GAL.Multithreading.Resources; - -namespace Ryujinx.Graphics.GAL.Multithreading.Commands -{ - struct SetTextureCommand : IGALCommand - { - public CommandType CommandType => CommandType.SetTexture; - private int _binding; - private TableRef _texture; - - public void Set(int binding, TableRef texture) - { - _binding = binding; - _texture = texture; - } - - public static void Run(ref SetTextureCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - renderer.Pipeline.SetTexture(command._binding, command._texture.GetAs(threaded)?.Base); - } - } -} diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs index 010ee7e65..9108de213 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedPipeline.cs @@ -1,6 +1,7 @@ using Ryujinx.Graphics.GAL.Multithreading.Commands; using Ryujinx.Graphics.GAL.Multithreading.Model; using Ryujinx.Graphics.GAL.Multithreading.Resources; +using Ryujinx.Graphics.Shader; using System; using System.Linq; @@ -40,15 +41,15 @@ namespace Ryujinx.Graphics.GAL.Multithreading _renderer.QueueCommand(); } - public void ClearRenderTargetColor(int index, uint componentMask, ColorF color) + public void ClearRenderTargetColor(int index, int layer, uint componentMask, ColorF color) { - _renderer.New().Set(index, componentMask, color); + _renderer.New().Set(index, layer, componentMask, color); _renderer.QueueCommand(); } - public void ClearRenderTargetDepthStencil(float depthValue, bool depthMask, int stencilValue, int stencilMask) + public void ClearRenderTargetDepthStencil(int layer, float depthValue, bool depthMask, int stencilValue, int stencilMask) { - _renderer.New().Set(depthValue, depthMask, stencilValue, stencilMask); + _renderer.New().Set(layer, depthValue, depthMask, stencilValue, stencilMask); _renderer.QueueCommand(); } @@ -244,15 +245,9 @@ namespace Ryujinx.Graphics.GAL.Multithreading _renderer.QueueCommand(); } - public void SetSampler(int binding, ISampler sampler) + public void SetScissors(ReadOnlySpan> scissors) { - _renderer.New().Set(binding, Ref(sampler)); - _renderer.QueueCommand(); - } - - public void SetScissor(int index, bool enable, int x, int y, int width, int height) - { - _renderer.New().Set(index, enable, x, y, width, height); + _renderer.New().Set(_renderer.CopySpan(scissors)); _renderer.QueueCommand(); } @@ -268,9 +263,9 @@ namespace Ryujinx.Graphics.GAL.Multithreading _renderer.QueueCommand(); } - public void SetTexture(int binding, ITexture texture) + public void SetTextureAndSampler(ShaderStage stage, int binding, ITexture texture, ISampler sampler) { - _renderer.New().Set(binding, Ref(texture)); + _renderer.New().Set(stage, binding, Ref(texture), Ref(sampler)); _renderer.QueueCommand(); } diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs index 63b668bac..f05f37c9f 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs @@ -76,7 +76,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading renderer.ScreenCaptured += (object sender, ScreenCaptureImageInfo info) => ScreenCaptured?.Invoke(this, info); Pipeline = new ThreadedPipeline(this, renderer.Pipeline); - Window = new ThreadedWindow(this, renderer.Window); + Window = new ThreadedWindow(this, renderer); Buffers = new BufferMap(); Sync = new SyncMap(); Programs = new ProgramQueue(renderer); @@ -262,7 +262,9 @@ namespace Ryujinx.Graphics.GAL.Multithreading public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info) { var program = new ThreadedProgram(this); + SourceProgramRequest request = new SourceProgramRequest(program, shaders, info); + Programs.Add(request); New().Set(Ref((IProgramRequest)request)); @@ -337,6 +339,11 @@ namespace Ryujinx.Graphics.GAL.Multithreading return box.Result; } + public HardwareInfo GetHardwareInfo() + { + return _baseRenderer.GetHardwareInfo(); + } + /// /// Initialize the base renderer. Must be called on the render thread. /// diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedWindow.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedWindow.cs index dc0b4dc5e..c80455028 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedWindow.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedWindow.cs @@ -8,12 +8,12 @@ namespace Ryujinx.Graphics.GAL.Multithreading public class ThreadedWindow : IWindow { private ThreadedRenderer _renderer; - private IWindow _impl; + private IRenderer _impl; - public ThreadedWindow(ThreadedRenderer renderer, IWindow window) + public ThreadedWindow(ThreadedRenderer renderer, IRenderer impl) { _renderer = renderer; - _impl = window; + _impl = impl; } public void Present(ITexture texture, ImageCrop crop, Action swapBuffersCallback) @@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading public void SetSize(int width, int height) { - _impl.SetSize(width, height); + _impl.Window.SetSize(width, height); } } } diff --git a/Ryujinx.Graphics.GAL/ProgramPipelineState.cs b/Ryujinx.Graphics.GAL/ProgramPipelineState.cs new file mode 100644 index 000000000..10a4164e6 --- /dev/null +++ b/Ryujinx.Graphics.GAL/ProgramPipelineState.cs @@ -0,0 +1,78 @@ +using Ryujinx.Common.Memory; +using System; + +namespace Ryujinx.Graphics.GAL +{ + /// + /// Descriptor for a pipeline buffer binding. + /// + public struct BufferPipelineDescriptor + { + public bool Enable { get; } + public int Stride { get; } + public int Divisor { get; } + + public BufferPipelineDescriptor(bool enable, int stride, int divisor) + { + Enable = enable; + Stride = stride; + Divisor = divisor; + } + } + + /// + /// State required for a program to compile shaders. + /// + public struct ProgramPipelineState + { + // Some state is considered always dynamic and should not be included: + // - Viewports/Scissors + // - Bias values (not enable) + + public int SamplesCount; + public Array8 AttachmentEnable; + public Array8 AttachmentFormats; + public bool DepthStencilEnable; + public Format DepthStencilFormat; + + public bool LogicOpEnable; + public LogicalOp LogicOp; + public Array8 BlendDescriptors; + public Array8 ColorWriteMask; + + public int VertexAttribCount; + public Array32 VertexAttribs; + + public int VertexBufferCount; + public Array32 VertexBuffers; + + // TODO: Min/max depth bounds. + public DepthTestDescriptor DepthTest; + public StencilTestDescriptor StencilTest; + public FrontFace FrontFace; + public Face CullMode; + public bool CullEnable; + + public PolygonModeMask BiasEnable; + + public float LineWidth; + // TODO: Polygon mode. + public bool DepthClampEnable; + public bool RasterizerDiscard; + public PrimitiveTopology Topology; + public bool PrimitiveRestartEnable; + public uint PatchControlPoints; + + public void SetVertexAttribs(ReadOnlySpan vertexAttribs) + { + VertexAttribCount = vertexAttribs.Length; + vertexAttribs.CopyTo(VertexAttribs.ToSpan()); + } + + public void SetLogicOpState(bool enable, LogicalOp op) + { + LogicOp = op; + LogicOpEnable = enable; + } + } +} diff --git a/Ryujinx.Graphics.GAL/Rectangle.cs b/Ryujinx.Graphics.GAL/Rectangle.cs new file mode 100644 index 000000000..375472da3 --- /dev/null +++ b/Ryujinx.Graphics.GAL/Rectangle.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.GAL +{ + public struct Rectangle where T : unmanaged + { + public T X { get; } + public T Y { get; } + public T Width { get; } + public T Height { get; } + + public Rectangle(T x, T y, T width, T height) + { + X = x; + Y = y; + Width = width; + Height = height; + } + } +} diff --git a/Ryujinx.Graphics.GAL/ShaderBindings.cs b/Ryujinx.Graphics.GAL/ShaderBindings.cs new file mode 100644 index 000000000..ea8e17491 --- /dev/null +++ b/Ryujinx.Graphics.GAL/ShaderBindings.cs @@ -0,0 +1,24 @@ +using System.Collections.Generic; + +namespace Ryujinx.Graphics.GAL +{ + public struct ShaderBindings + { + public IReadOnlyCollection UniformBufferBindings { get; } + public IReadOnlyCollection StorageBufferBindings { get; } + public IReadOnlyCollection TextureBindings { get; } + public IReadOnlyCollection ImageBindings { get; } + + public ShaderBindings( + IReadOnlyCollection uniformBufferBindings, + IReadOnlyCollection storageBufferBindings, + IReadOnlyCollection textureBindings, + IReadOnlyCollection imageBindings) + { + UniformBufferBindings = uniformBufferBindings; + StorageBufferBindings = storageBufferBindings; + TextureBindings = textureBindings; + ImageBindings = imageBindings; + } + } +} diff --git a/Ryujinx.Graphics.GAL/ShaderInfo.cs b/Ryujinx.Graphics.GAL/ShaderInfo.cs index 0c187e066..b4c871178 100644 --- a/Ryujinx.Graphics.GAL/ShaderInfo.cs +++ b/Ryujinx.Graphics.GAL/ShaderInfo.cs @@ -3,10 +3,21 @@ namespace Ryujinx.Graphics.GAL public struct ShaderInfo { public int FragmentOutputMap { get; } + public ProgramPipelineState? State { get; } + public bool FromCache { get; set; } - public ShaderInfo(int fragmentOutputMap) + public ShaderInfo(int fragmentOutputMap, ProgramPipelineState state, bool fromCache = false) { FragmentOutputMap = fragmentOutputMap; + State = state; + FromCache = fromCache; + } + + public ShaderInfo(int fragmentOutputMap, bool fromCache = false) + { + FragmentOutputMap = fragmentOutputMap; + State = null; + FromCache = fromCache; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.GAL/ShaderSource.cs b/Ryujinx.Graphics.GAL/ShaderSource.cs index 13b92f20a..c68ba80d6 100644 --- a/Ryujinx.Graphics.GAL/ShaderSource.cs +++ b/Ryujinx.Graphics.GAL/ShaderSource.cs @@ -7,22 +7,24 @@ namespace Ryujinx.Graphics.GAL { public string Code { get; } public byte[] BinaryCode { get; } + public ShaderBindings Bindings { get; } public ShaderStage Stage { get; } public TargetLanguage Language { get; } - public ShaderSource(string code, byte[] binaryCode, ShaderStage stage, TargetLanguage language) + public ShaderSource(string code, byte[] binaryCode, ShaderBindings bindings, ShaderStage stage, TargetLanguage language) { Code = code; BinaryCode = binaryCode; + Bindings = bindings; Stage = stage; Language = language; } - public ShaderSource(string code, ShaderStage stage, TargetLanguage language) : this(code, null, stage, language) + public ShaderSource(string code, ShaderBindings bindings, ShaderStage stage, TargetLanguage language) : this(code, null, bindings, stage, language) { } - public ShaderSource(byte[] binaryCode, ShaderStage stage, TargetLanguage language) : this(null, binaryCode, stage, language) + public ShaderSource(byte[] binaryCode, ShaderBindings bindings, ShaderStage stage, TargetLanguage language) : this(null, binaryCode, bindings, stage, language) { } } diff --git a/Ryujinx.Graphics.GAL/Viewport.cs b/Ryujinx.Graphics.GAL/Viewport.cs index d9d6e20a4..58135db2c 100644 --- a/Ryujinx.Graphics.GAL/Viewport.cs +++ b/Ryujinx.Graphics.GAL/Viewport.cs @@ -2,7 +2,7 @@ namespace Ryujinx.Graphics.GAL { public struct Viewport { - public RectangleF Region { get; } + public Rectangle Region { get; } public ViewportSwizzle SwizzleX { get; } public ViewportSwizzle SwizzleY { get; } @@ -13,13 +13,13 @@ namespace Ryujinx.Graphics.GAL public float DepthFar { get; } public Viewport( - RectangleF region, - ViewportSwizzle swizzleX, - ViewportSwizzle swizzleY, - ViewportSwizzle swizzleZ, - ViewportSwizzle swizzleW, - float depthNear, - float depthFar) + Rectangle region, + ViewportSwizzle swizzleX, + ViewportSwizzle swizzleY, + ViewportSwizzle swizzleZ, + ViewportSwizzle swizzleW, + float depthNear, + float depthFar) { Region = region; SwizzleX = swizzleX; diff --git a/Ryujinx.Graphics.Gpu/Constants.cs b/Ryujinx.Graphics.Gpu/Constants.cs index 026d12a92..1738fddf7 100644 --- a/Ryujinx.Graphics.Gpu/Constants.cs +++ b/Ryujinx.Graphics.Gpu/Constants.cs @@ -40,6 +40,22 @@ namespace Ryujinx.Graphics.Gpu /// public const int TotalTransformFeedbackBuffers = 4; + /// + /// Maximum number of textures on a single shader stage. + /// + /// + /// The maximum number of textures is API limited, the hardware supports a unlimited amount. + /// + public const int TotalTextures = 32; + + /// + /// Maximum number of images on a single shader stage. + /// + /// + /// The maximum number of images is API limited, the hardware supports a unlimited amount. + /// + public const int TotalImages = 8; + /// /// Maximum number of render target color buffers. /// @@ -53,7 +69,7 @@ namespace Ryujinx.Graphics.Gpu /// /// Maximum number of vertex attributes. /// - public const int TotalVertexAttribs = 16; + public const int TotalVertexAttribs = 16; // FIXME: Should be 32, but OpenGL only supports 16. /// /// Maximum number of vertex buffers. diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs index 87c14da8f..a1a9b481f 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs @@ -188,6 +188,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute _channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers); _channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers); + int maxTextureBinding = -1; + int maxImageBinding = -1; + TextureBindingInfo[] textureBindings = _channel.TextureManager.RentComputeTextureBindings(info.Textures.Count); for (int index = 0; index < info.Textures.Count; index++) @@ -202,6 +205,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Flags); + + if (descriptor.Binding > maxTextureBinding) + { + maxTextureBinding = descriptor.Binding; + } } TextureBindingInfo[] imageBindings = _channel.TextureManager.RentComputeImageBindings(info.Images.Count); @@ -220,9 +228,18 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Flags); + + if (descriptor.Binding > maxImageBinding) + { + maxImageBinding = descriptor.Binding; + } } - _channel.TextureManager.CommitComputeBindings(); + _channel.TextureManager.SetComputeMaxBindings(maxTextureBinding, maxImageBinding); + + // Should never return false for mismatching spec state, since the shader was fetched above. + _channel.TextureManager.CommitComputeBindings(cs.SpecializationState); + _channel.BufferManager.CommitComputeBindings(); _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth); diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs index ab3713141..6dc5dca5b 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs @@ -505,8 +505,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed } int index = (argument >> 6) & 0xf; + int layer = (argument >> 10) & 0x3ff; - engine.UpdateRenderTargetState(useControl: false, singleUse: index); + engine.UpdateRenderTargetState(useControl: false, layered: layer != 0, singleUse: index); // If there is a mismatch on the host clip region and the one explicitly defined by the guest // on the screen scissor state, then we need to force only one texture to be bound to avoid @@ -558,7 +559,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed scissorH = (int)MathF.Ceiling(scissorH * scale); } - _context.Renderer.Pipeline.SetScissor(0, true, scissorX, scissorY, scissorW, scissorH); + Span> scissors = stackalloc Rectangle[1]; + scissors[0] = new Rectangle(scissorX, scissorY, scissorW, scissorH); + + _context.Renderer.Pipeline.SetScissors(scissors); } if (clipMismatch) @@ -581,7 +585,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed ColorF color = new ColorF(clearColor.Red, clearColor.Green, clearColor.Blue, clearColor.Alpha); - _context.Renderer.Pipeline.ClearRenderTargetColor(index, componentMask, color); + _context.Renderer.Pipeline.ClearRenderTargetColor(index, layer, componentMask, color); } if (clearDepth || clearStencil) @@ -602,6 +606,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed } _context.Renderer.Pipeline.ClearRenderTargetDepthStencil( + layer, depthValue, clearDepth, stencilValue, diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index d0c3bc5ae..19a80cb37 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -1,4 +1,5 @@ using Ryujinx.Common.Logging; +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Types; using Ryujinx.Graphics.Gpu.Image; @@ -15,11 +16,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// class StateUpdater { - public const int ShaderStateIndex = 0; - public const int RasterizerStateIndex = 1; - public const int ScissorStateIndex = 2; - public const int VertexBufferStateIndex = 3; - public const int PrimitiveRestartStateIndex = 4; + public const int ShaderStateIndex = 16; + public const int RasterizerStateIndex = 15; + public const int ScissorStateIndex = 18; + public const int VertexBufferStateIndex = 0; + public const int PrimitiveRestartStateIndex = 12; private readonly GpuContext _context; private readonly GpuChannel _channel; @@ -31,6 +32,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed private readonly ShaderProgramInfo[] _currentProgramInfo; private ShaderSpecializationState _shaderSpecState; + private ProgramPipelineState _pipeline; + private bool _vtgWritesRtLayer; private byte _vsClipDistancesWritten; @@ -54,7 +57,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed _drawState = drawState; _currentProgramInfo = new ShaderProgramInfo[Constants.ShaderStages]; - // ShaderState must be the first, as other state updates depends on information from the currently bound shader. + // ShaderState must be updated after other state updates, as pipeline state is sent to the backend when compiling new shaders. + // Render target state must appear after shader state as it depends on information from the currently bound shader. // Rasterizer and scissor states are checked by render target clear, their indexes // must be updated on the constants "RasterizerStateIndex" and "ScissorStateIndex" if modified. // The vertex buffer state may be forced dirty when a indexed draw starts, the "VertexBufferStateIndex" @@ -62,53 +66,39 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed // The order of the other state updates doesn't matter. _updateTracker = new StateUpdateTracker(new[] { - new StateUpdateCallbackEntry(UpdateShaderState, - nameof(ThreedClassState.ShaderBaseAddress), - nameof(ThreedClassState.ShaderState)), - - new StateUpdateCallbackEntry(UpdateRasterizerState, nameof(ThreedClassState.RasterizeEnable)), - - new StateUpdateCallbackEntry(UpdateScissorState, - nameof(ThreedClassState.ScissorState), - nameof(ThreedClassState.ScreenScissorState)), - new StateUpdateCallbackEntry(UpdateVertexBufferState, nameof(ThreedClassState.VertexBufferDrawState), nameof(ThreedClassState.VertexBufferInstanced), nameof(ThreedClassState.VertexBufferState), nameof(ThreedClassState.VertexBufferEndAddress)), - new StateUpdateCallbackEntry(UpdatePrimitiveRestartState, - nameof(ThreedClassState.PrimitiveRestartDrawArrays), - nameof(ThreedClassState.PrimitiveRestartState)), + new StateUpdateCallbackEntry(UpdateVertexAttribState, nameof(ThreedClassState.VertexAttribState)), - new StateUpdateCallbackEntry(UpdateTessellationState, - nameof(ThreedClassState.TessOuterLevel), - nameof(ThreedClassState.TessInnerLevel), - nameof(ThreedClassState.PatchVertices)), + new StateUpdateCallbackEntry(UpdateBlendState, + nameof(ThreedClassState.BlendIndependent), + nameof(ThreedClassState.BlendConstant), + nameof(ThreedClassState.BlendStateCommon), + nameof(ThreedClassState.BlendEnableCommon), + nameof(ThreedClassState.BlendEnable), + nameof(ThreedClassState.BlendState)), - new StateUpdateCallbackEntry(UpdateTfBufferState, nameof(ThreedClassState.TfBufferState)), - new StateUpdateCallbackEntry(UpdateUserClipState, nameof(ThreedClassState.ClipDistanceEnable)), + new StateUpdateCallbackEntry(UpdateFaceState, nameof(ThreedClassState.FaceState)), - new StateUpdateCallbackEntry(UpdateRenderTargetState, - nameof(ThreedClassState.RtColorState), - nameof(ThreedClassState.RtDepthStencilState), - nameof(ThreedClassState.RtControl), - nameof(ThreedClassState.RtDepthStencilSize), - nameof(ThreedClassState.RtDepthStencilEnable)), - - new StateUpdateCallbackEntry(UpdateDepthClampState, nameof(ThreedClassState.ViewVolumeClipControl)), - - new StateUpdateCallbackEntry(UpdateAlphaTestState, - nameof(ThreedClassState.AlphaTestEnable), - nameof(ThreedClassState.AlphaTestRef), - nameof(ThreedClassState.AlphaTestFunc)), + new StateUpdateCallbackEntry(UpdateStencilTestState, + nameof(ThreedClassState.StencilBackMasks), + nameof(ThreedClassState.StencilTestState), + nameof(ThreedClassState.StencilBackTestState)), new StateUpdateCallbackEntry(UpdateDepthTestState, nameof(ThreedClassState.DepthTestEnable), nameof(ThreedClassState.DepthWriteEnable), nameof(ThreedClassState.DepthTestFunc)), + new StateUpdateCallbackEntry(UpdateTessellationState, + nameof(ThreedClassState.TessOuterLevel), + nameof(ThreedClassState.TessInnerLevel), + nameof(ThreedClassState.PatchVertices)), + new StateUpdateCallbackEntry(UpdateViewportTransform, nameof(ThreedClassState.DepthMode), nameof(ThreedClassState.ViewportTransform), @@ -116,6 +106,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed nameof(ThreedClassState.YControl), nameof(ThreedClassState.ViewportTransformEnable)), + new StateUpdateCallbackEntry(UpdateLogicOpState, nameof(ThreedClassState.LogicOpState)), + + new StateUpdateCallbackEntry(UpdateDepthClampState, nameof(ThreedClassState.ViewVolumeClipControl)), + new StateUpdateCallbackEntry(UpdatePolygonMode, nameof(ThreedClassState.PolygonModeFront), nameof(ThreedClassState.PolygonModeBack)), @@ -126,21 +120,46 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed nameof(ThreedClassState.DepthBiasUnits), nameof(ThreedClassState.DepthBiasClamp)), - new StateUpdateCallbackEntry(UpdateStencilTestState, - nameof(ThreedClassState.StencilBackMasks), - nameof(ThreedClassState.StencilTestState), - nameof(ThreedClassState.StencilBackTestState)), + new StateUpdateCallbackEntry(UpdatePrimitiveRestartState, nameof(ThreedClassState.PrimitiveRestartState)), + + new StateUpdateCallbackEntry(UpdateLineState, + nameof(ThreedClassState.LineWidthSmooth), + nameof(ThreedClassState.LineSmoothEnable)), + + new StateUpdateCallbackEntry(UpdateRtColorMask, + nameof(ThreedClassState.RtColorMaskShared), + nameof(ThreedClassState.RtColorMask)), + + new StateUpdateCallbackEntry(UpdateRasterizerState, nameof(ThreedClassState.RasterizeEnable)), + + new StateUpdateCallbackEntry(UpdateShaderState, + nameof(ThreedClassState.ShaderBaseAddress), + nameof(ThreedClassState.ShaderState)), + + new StateUpdateCallbackEntry(UpdateRenderTargetState, + nameof(ThreedClassState.RtColorState), + nameof(ThreedClassState.RtDepthStencilState), + nameof(ThreedClassState.RtControl), + nameof(ThreedClassState.RtDepthStencilSize), + nameof(ThreedClassState.RtDepthStencilEnable)), + + new StateUpdateCallbackEntry(UpdateScissorState, + nameof(ThreedClassState.ScissorState), + nameof(ThreedClassState.ScreenScissorState)), + + new StateUpdateCallbackEntry(UpdateTfBufferState, nameof(ThreedClassState.TfBufferState)), + new StateUpdateCallbackEntry(UpdateUserClipState, nameof(ThreedClassState.ClipDistanceEnable)), + + new StateUpdateCallbackEntry(UpdateAlphaTestState, + nameof(ThreedClassState.AlphaTestEnable), + nameof(ThreedClassState.AlphaTestRef), + nameof(ThreedClassState.AlphaTestFunc)), new StateUpdateCallbackEntry(UpdateSamplerPoolState, nameof(ThreedClassState.SamplerPoolState), nameof(ThreedClassState.SamplerIndex)), new StateUpdateCallbackEntry(UpdateTexturePoolState, nameof(ThreedClassState.TexturePoolState)), - new StateUpdateCallbackEntry(UpdateVertexAttribState, nameof(ThreedClassState.VertexAttribState)), - - new StateUpdateCallbackEntry(UpdateLineState, - nameof(ThreedClassState.LineWidthSmooth), - nameof(ThreedClassState.LineSmoothEnable)), new StateUpdateCallbackEntry(UpdatePointState, nameof(ThreedClassState.PointSize), @@ -151,22 +170,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed new StateUpdateCallbackEntry(UpdateIndexBufferState, nameof(ThreedClassState.IndexBufferState), nameof(ThreedClassState.IndexBufferCount)), - - new StateUpdateCallbackEntry(UpdateFaceState, nameof(ThreedClassState.FaceState)), - - new StateUpdateCallbackEntry(UpdateRtColorMask, - nameof(ThreedClassState.RtColorMaskShared), - nameof(ThreedClassState.RtColorMask)), - - new StateUpdateCallbackEntry(UpdateBlendState, - nameof(ThreedClassState.BlendIndependent), - nameof(ThreedClassState.BlendConstant), - nameof(ThreedClassState.BlendStateCommon), - nameof(ThreedClassState.BlendEnableCommon), - nameof(ThreedClassState.BlendEnable), - nameof(ThreedClassState.BlendState)), - - new StateUpdateCallbackEntry(UpdateLogicOpState, nameof(ThreedClassState.LogicOpState)) }); } @@ -201,7 +204,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed // of the shader for the new state. if (_shaderSpecState != null) { - if (!_shaderSpecState.MatchesGraphics(_channel, GetPoolState(), GetGraphicsState())) + if (!_shaderSpecState.MatchesGraphics(_channel, GetPoolState(), GetGraphicsState(), false)) { ForceShaderUpdate(); } @@ -275,7 +278,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { UpdateStorageBuffers(); - _channel.TextureManager.CommitGraphicsBindings(); + if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState)) + { + // Shader must be reloaded. + UpdateShaderState(); + } + _channel.BufferManager.CommitGraphicsBindings(); } @@ -315,6 +323,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// private void UpdateTessellationState() { + _pipeline.PatchControlPoints = (uint)_state.State.PatchVertices; + _context.Renderer.Pipeline.SetPatchParameters( _state.State.PatchVertices, _state.State.TessOuterLevel.ToSpan(), @@ -347,6 +357,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed private void UpdateRasterizerState() { bool enable = _state.State.RasterizeEnable; + _pipeline.RasterizerDiscard = !enable; _context.Renderer.Pipeline.SetRasterizerDiscard(!enable); } @@ -362,8 +373,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// Updates render targets (color and depth-stencil buffers) based on current render target state. /// /// Use draw buffers information from render target control register + /// Indicates if the texture is layered /// If this is not -1, it indicates that only the given indexed target will be used. - public void UpdateRenderTargetState(bool useControl, int singleUse = -1) + public void UpdateRenderTargetState(bool useControl, bool layered = false, int singleUse = -1) { var memoryManager = _channel.MemoryManager; var rtControl = _state.State.RtControl; @@ -399,7 +411,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed Image.Texture color = memoryManager.Physical.TextureCache.FindOrCreateTexture( memoryManager, colorState, - _vtgWritesRtLayer, + _vtgWritesRtLayer || layered, samplesInX, samplesInY, sizeHint); @@ -433,6 +445,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed memoryManager, dsState, dsSize, + _vtgWritesRtLayer || layered, samplesInX, samplesInY, sizeHint); @@ -486,11 +499,21 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// public void UpdateScissorState() { + const int MinX = 0; + const int MinY = 0; + const int MaxW = 0xffff; + const int MaxH = 0xffff; + + Span> regions = stackalloc Rectangle[Constants.TotalViewports]; + for (int index = 0; index < Constants.TotalViewports; index++) { ScissorState scissor = _state.State.ScissorState[index]; - bool enable = scissor.Enable && (scissor.X1 != 0 || scissor.Y1 != 0 || scissor.X2 != 0xffff || scissor.Y2 != 0xffff); + bool enable = scissor.Enable && (scissor.X1 != MinX || + scissor.Y1 != MinY || + scissor.X2 != MaxW || + scissor.Y2 != MaxH); if (enable) { @@ -520,13 +543,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed height = (int)MathF.Ceiling(height * scale); } - _context.Renderer.Pipeline.SetScissor(index, true, x, y, width, height); + regions[index] = new Rectangle(x, y, width, height); } else { - _context.Renderer.Pipeline.SetScissor(index, false, 0, 0, 0, 0); + regions[index] = new Rectangle(MinX, MinY, MaxW, MaxH); } } + + _context.Renderer.Pipeline.SetScissors(regions); } /// @@ -536,7 +561,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed private void UpdateDepthClampState() { ViewVolumeClipControl clip = _state.State.ViewVolumeClipControl; - _context.Renderer.Pipeline.SetDepthClamp((clip & ViewVolumeClipControl.DepthClampDisabled) == 0); + bool clamp = (clip & ViewVolumeClipControl.DepthClampDisabled) == 0; + + _pipeline.DepthClampEnable = clamp; + _context.Renderer.Pipeline.SetDepthClamp(clamp); } /// @@ -555,10 +583,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// private void UpdateDepthTestState() { - _context.Renderer.Pipeline.SetDepthTest(new DepthTestDescriptor( + DepthTestDescriptor descriptor = new DepthTestDescriptor( _state.State.DepthTestEnable, _state.State.DepthWriteEnable, - _state.State.DepthTestFunc)); + _state.State.DepthTestFunc); + + _pipeline.DepthTest = descriptor; + _context.Renderer.Pipeline.SetDepthTest(descriptor); } /// @@ -585,7 +616,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed ref var scissor = ref _state.State.ScreenScissorState; float rScale = _channel.TextureManager.RenderTargetScale; - var scissorRect = new RectangleF(0, 0, (scissor.X + scissor.Width) * rScale, (scissor.Y + scissor.Height) * rScale); + var scissorRect = new Rectangle(0, 0, (scissor.X + scissor.Width) * rScale, (scissor.Y + scissor.Height) * rScale); viewports[index] = new Viewport(scissorRect, ViewportSwizzle.PositiveX, ViewportSwizzle.PositiveY, ViewportSwizzle.PositiveZ, ViewportSwizzle.PositiveW, 0, 1); continue; @@ -622,7 +653,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed height *= scale; } - RectangleF region = new RectangleF(x, y, width, height); + Rectangle region = new Rectangle(x, y, width, height); ViewportSwizzle swizzleX = transform.UnpackSwizzleX(); ViewportSwizzle swizzleY = transform.UnpackSwizzleY(); @@ -642,6 +673,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed viewports[index] = new Viewport(region, swizzleX, swizzleY, swizzleZ, swizzleW, depthNear, depthFar); } + _context.Renderer.Pipeline.SetDepthMode(GetDepthMode()); _context.Renderer.Pipeline.SetViewports(0, viewports, disableTransform); } @@ -650,37 +682,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// private void UpdateDepthMode() { - ref var transform = ref _state.State.ViewportTransform[0]; - ref var extents = ref _state.State.ViewportExtents[0]; - - DepthMode depthMode; - - if (!float.IsInfinity(extents.DepthNear) && - !float.IsInfinity(extents.DepthFar) && - (extents.DepthFar - extents.DepthNear) != 0) - { - // Try to guess the depth mode being used on the high level API - // based on current transform. - // It is setup like so by said APIs: - // If depth mode is ZeroToOne: - // TranslateZ = Near - // ScaleZ = Far - Near - // If depth mode is MinusOneToOne: - // TranslateZ = (Near + Far) / 2 - // ScaleZ = (Far - Near) / 2 - // DepthNear/Far are sorted such as that Near is always less than Far. - depthMode = extents.DepthNear != transform.TranslateZ && - extents.DepthFar != transform.TranslateZ - ? DepthMode.MinusOneToOne - : DepthMode.ZeroToOne; - } - else - { - // If we can't guess from the viewport transform, then just use the depth mode register. - depthMode = (DepthMode)(_state.State.DepthMode & 1); - } - - _context.Renderer.Pipeline.SetDepthMode(depthMode); + _context.Renderer.Pipeline.SetDepthMode(GetDepthMode()); } /// @@ -708,6 +710,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed enables |= (depthBias.LineEnable ? PolygonModeMask.Line : 0); enables |= (depthBias.FillEnable ? PolygonModeMask.Fill : 0); + _pipeline.BiasEnable = enables; _context.Renderer.Pipeline.SetDepthBias(enables, factor, units / 2f, clamp); } @@ -749,7 +752,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed backMask = test.FrontMask; } - _context.Renderer.Pipeline.SetStencilTest(new StencilTestDescriptor( + StencilTestDescriptor descriptor = new StencilTestDescriptor( test.Enable, test.FrontFunc, test.FrontSFail, @@ -764,7 +767,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed backDpFail, backFuncRef, backFuncMask, - backMask)); + backMask); + + _pipeline.StencilTest = descriptor; + _context.Renderer.Pipeline.SetStencilTest(descriptor); } /// @@ -833,6 +839,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed format); } + _pipeline.SetVertexAttribs(vertexAttribs); _context.Renderer.Pipeline.SetVertexAttribs(vertexAttribs); } @@ -844,6 +851,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed float width = _state.State.LineWidthSmooth; bool smooth = _state.State.LineSmoothEnable; + _pipeline.LineWidth = width; _context.Renderer.Pipeline.SetLineParameters(width, smooth); } @@ -870,6 +878,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed PrimitiveRestartState primitiveRestart = _state.State.PrimitiveRestartState; bool enable = primitiveRestart.Enable && (_drawState.DrawIndexed || _state.State.PrimitiveRestartDrawArrays); + _pipeline.PrimitiveRestartEnable = enable; _context.Renderer.Pipeline.SetPrimitiveRestart(enable, primitiveRestart.Index); } @@ -916,6 +925,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed if (!vertexBuffer.UnpackEnable()) { + _pipeline.VertexBuffers[index] = new BufferPipelineDescriptor(false, 0, 0); _channel.BufferManager.SetVertexBuffer(index, 0, 0, 0, 0); continue; @@ -933,6 +943,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed _drawState.IsAnyVbInstanced |= divisor != 0; + ulong vbSize = endAddress.Pack() - address + 1; ulong size; if (_drawState.IbStreamer.HasInlineIndexData || _drawState.DrawIndexed || stride == 0 || instanced) @@ -940,7 +951,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed // This size may be (much) larger than the real vertex buffer size. // Avoid calculating it this way, unless we don't have any other option. - size = endAddress.Pack() - address + 1; + size = vbSize; if (stride > 0 && indexTypeSmall && _drawState.DrawIndexed && !instanced) { @@ -964,9 +975,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed var drawState = _state.State.VertexBufferDrawState; - size = (ulong)((firstInstance + drawState.First + drawState.Count) * stride); + size = Math.Min(vbSize, (ulong)((firstInstance + drawState.First + drawState.Count) * stride)); } + _pipeline.VertexBuffers[index] = new BufferPipelineDescriptor(_channel.MemoryManager.IsMapped(address), stride, divisor); _channel.BufferManager.SetVertexBuffer(index, address, size, stride, divisor); } } @@ -979,6 +991,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed var yControl = _state.State.YControl; var face = _state.State.FaceState; + _pipeline.CullEnable = face.CullEnable; + _pipeline.CullMode = face.CullFace; _context.Renderer.Pipeline.SetFaceCulling(face.CullEnable, face.CullFace); UpdateFrontFace(yControl, face.FrontFace); @@ -998,6 +1012,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed frontFace = frontFace == FrontFace.CounterClockwise ? FrontFace.Clockwise : FrontFace.CounterClockwise; } + _pipeline.FrontFace = frontFace; _context.Renderer.Pipeline.SetFrontFace(frontFace); } @@ -1023,6 +1038,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed componentMask |= (colorMask.UnpackAlpha() ? 8u : 0u); componentMasks[index] = componentMask; + _pipeline.ColorWriteMask[index] = componentMask; } _context.Renderer.Pipeline.SetRenderTargetColorMasks(componentMasks); @@ -1071,6 +1087,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed blend.AlphaDstFactor); } + _pipeline.BlendDescriptors[index] = descriptor; _context.Renderer.Pipeline.SetBlendState(index, descriptor); } } @@ -1082,6 +1099,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { LogicalOpState logicOpState = _state.State.LogicOpState; + _pipeline.SetLogicOpState(logicOpState.Enable, logicOpState.LogicalOp); _context.Renderer.Pipeline.SetLogicOpState(logicOpState.Enable, logicOpState.LogicalOp); } @@ -1113,7 +1131,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed GpuChannelPoolState poolState = GetPoolState(); GpuChannelGraphicsState graphicsState = GetGraphicsState(); - CachedShaderProgram gs = shaderCache.GetGraphicsShader(ref _state.State, _channel, poolState, graphicsState, addresses); + CachedShaderProgram gs = shaderCache.GetGraphicsShader(ref _state.State, ref _pipeline, _channel, poolState, graphicsState, addresses); _shaderSpecState = gs.SpecializationState; @@ -1148,6 +1166,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed return; } + int maxTextureBinding = -1; + int maxImageBinding = -1; + Span textureBindings = _channel.TextureManager.RentGraphicsTextureBindings(stage, info.Textures.Count); if (info.UsesRtLayer) @@ -1167,6 +1188,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Flags); + + if (descriptor.Binding > maxTextureBinding) + { + maxTextureBinding = descriptor.Binding; + } } TextureBindingInfo[] imageBindings = _channel.TextureManager.RentGraphicsImageBindings(stage, info.Images.Count); @@ -1185,8 +1211,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Flags); + + if (descriptor.Binding > maxImageBinding) + { + maxImageBinding = descriptor.Binding; + } } + _channel.TextureManager.SetGraphicsMaxBindings(maxTextureBinding, maxImageBinding); + _channel.BufferManager.SetGraphicsStorageBufferBindings(stage, info.SBuffers); _channel.BufferManager.SetGraphicsUniformBufferBindings(stage, info.CBuffers); } @@ -1205,11 +1238,67 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// Current GPU channel state private GpuChannelGraphicsState GetGraphicsState() { + ref var vertexAttribState = ref _state.State.VertexAttribState; + + Array32 attributeTypes = new Array32(); + + for (int location = 0; location < attributeTypes.Length; location++) + { + attributeTypes[location] = vertexAttribState[location].UnpackType() switch + { + 3 => AttributeType.Sint, + 4 => AttributeType.Uint, + _ => AttributeType.Float + }; + } + return new GpuChannelGraphicsState( _state.State.EarlyZForce, _drawState.Topology, _state.State.TessMode, - _state.State.ViewportTransformEnable == 0); + _state.State.ViewportTransformEnable == 0, + GetDepthMode() == DepthMode.MinusOneToOne, + _state.State.VertexProgramPointSize, + _state.State.PointSize, + _state.State.AlphaTestEnable, + _state.State.AlphaTestFunc, + _state.State.AlphaTestRef, + ref attributeTypes); + } + + private DepthMode GetDepthMode() + { + ref var transform = ref _state.State.ViewportTransform[0]; + ref var extents = ref _state.State.ViewportExtents[0]; + + DepthMode depthMode; + + if (!float.IsInfinity(extents.DepthNear) && + !float.IsInfinity(extents.DepthFar) && + (extents.DepthFar - extents.DepthNear) != 0) + { + // Try to guess the depth mode being used on the high level API + // based on current transform. + // It is setup like so by said APIs: + // If depth mode is ZeroToOne: + // TranslateZ = Near + // ScaleZ = Far - Near + // If depth mode is MinusOneToOne: + // TranslateZ = (Near + Far) / 2 + // ScaleZ = (Far - Near) / 2 + // DepthNear/Far are sorted such as that Near is always less than Far. + depthMode = extents.DepthNear != transform.TranslateZ && + extents.DepthFar != transform.TranslateZ + ? DepthMode.MinusOneToOne + : DepthMode.ZeroToOne; + } + else + { + // If we can't guess from the viewport transform, then just use the depth mode register. + depthMode = (DepthMode)(_state.State.DepthMode & 1); + } + + return depthMode; } /// diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs index a2e8c64c1..764ba2394 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs @@ -131,10 +131,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// Updates render targets (color and depth-stencil buffers) based on current render target state. /// /// Use draw buffers information from render target control register + /// Indicates if the texture is layered /// If this is not -1, it indicates that only the given indexed target will be used. - public void UpdateRenderTargetState(bool useControl, int singleUse = -1) + public void UpdateRenderTargetState(bool useControl, bool layered = false, int singleUse = -1) { - _stateUpdater.UpdateRenderTargetState(useControl, singleUse); + _stateUpdater.UpdateRenderTargetState(useControl, layered, singleUse); } /// diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs index 81a228315..26260ce59 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClassState.cs @@ -311,6 +311,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { return Attribute & 0x3fe00000; } + + /// + /// Unpacks the Maxwell attribute component type. + /// + /// Attribute component type + public uint UnpackType() + { + return (Attribute >> 27) & 7; + } } /// @@ -759,8 +768,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed public fixed uint Reserved10B0[18]; public uint ClearFlags; public fixed uint Reserved10FC[25]; - public Array16 VertexAttribState; - public fixed uint Reserved11A0[31]; + public Array32 VertexAttribState; + public fixed uint Reserved11E0[15]; public RtControl RtControl; public fixed uint Reserved1220[2]; public Size3D RtDepthStencilSize; diff --git a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs index 493dbd7bd..d2f98c7f2 100644 --- a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs +++ b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs @@ -30,8 +30,8 @@ namespace Ryujinx.Graphics.Gpu /// /// Enables or disables fast 2d engine texture copies entirely on CPU when possible. - /// Reduces stuttering and # of textures in games that copy textures around for streaming, - /// as textures will not need to be created for the copy, and the data does not need to be + /// Reduces stuttering and # of textures in games that copy textures around for streaming, + /// as textures will not need to be created for the copy, and the data does not need to be /// flushed from GPU. /// public static bool Fast2DCopy = true; @@ -56,5 +56,15 @@ namespace Ryujinx.Graphics.Gpu /// Enables or disables the shader cache. /// public static bool EnableShaderCache; + + /// + /// Enables or disables shader SPIR-V compilation. + /// + public static bool EnableSpirvCompilationOnVulkan = true; + + /// + /// Enables or disables recompression of compressed textures that are not natively supported by the host. + /// + public static bool EnableTextureRecompression = false; } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Image/Pool.cs b/Ryujinx.Graphics.Gpu/Image/Pool.cs index f54ce1d70..8e2105134 100644 --- a/Ryujinx.Graphics.Gpu/Image/Pool.cs +++ b/Ryujinx.Graphics.Gpu/Image/Pool.cs @@ -1,6 +1,7 @@ using Ryujinx.Cpu.Tracking; using Ryujinx.Graphics.Gpu.Memory; using System; +using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Image { @@ -16,6 +17,7 @@ namespace Ryujinx.Graphics.Gpu.Image protected GpuContext Context; protected PhysicalMemory PhysicalMemory; protected int SequenceNumber; + protected int ModifiedSequenceNumber; protected T1[] Items; protected T2[] DescriptorCache; @@ -41,6 +43,9 @@ namespace Ryujinx.Graphics.Gpu.Image private readonly CpuMultiRegionHandle _memoryTracking; private readonly Action _modifiedDelegate; + private int _modifiedSequenceOffset; + private bool _modified; + /// /// Creates a new instance of the GPU resource pool. /// @@ -79,6 +84,16 @@ namespace Ryujinx.Graphics.Gpu.Image return PhysicalMemory.Read(Address + (ulong)id * DescriptorSize); } + /// + /// Gets a reference to the descriptor for a given ID. + /// + /// ID of the descriptor. This is effectively a zero-based index + /// A reference to the descriptor + public ref readonly T2 GetDescriptorRef(int id) + { + return ref MemoryMarshal.Cast(PhysicalMemory.GetSpan(Address + (ulong)id * DescriptorSize, DescriptorSize))[0]; + } + /// /// Gets the GPU resource with the given ID. /// @@ -93,7 +108,13 @@ namespace Ryujinx.Graphics.Gpu.Image /// public void SynchronizeMemory() { + _modified = false; _memoryTracking.QueryModified(_modifiedDelegate); + + if (_modified) + { + UpdateModifiedSequence(); + } } /// @@ -103,6 +124,8 @@ namespace Ryujinx.Graphics.Gpu.Image /// Size of the modified region private void RegionModified(ulong mAddress, ulong mSize) { + _modified = true; + if (mAddress < Address) { mAddress = Address; @@ -118,6 +141,15 @@ namespace Ryujinx.Graphics.Gpu.Image InvalidateRangeImpl(mAddress, mSize); } + /// + /// Updates the modified sequence number using the current sequence number and offset, + /// indicating that it has been modified. + /// + protected void UpdateModifiedSequence() + { + ModifiedSequenceNumber = SequenceNumber + _modifiedSequenceOffset; + } + /// /// An action to be performed when a precise memory access occurs to this resource. /// Makes sure that the dirty flags are checked. @@ -129,6 +161,16 @@ namespace Ryujinx.Graphics.Gpu.Image { if (write && Context.SequenceNumber == SequenceNumber) { + if (ModifiedSequenceNumber == SequenceNumber + _modifiedSequenceOffset) + { + // The modified sequence number is offset when PreciseActions occur so that + // users checking it will see an increment and know the pool has changed since + // their last look, even though the main SequenceNumber has not been changed. + + _modifiedSequenceOffset++; + } + + // Force the pool to be checked again the next time it is used. SequenceNumber--; } diff --git a/Ryujinx.Graphics.Gpu/Image/Sampler.cs b/Ryujinx.Graphics.Gpu/Image/Sampler.cs index f8923d349..b70ac9eb9 100644 --- a/Ryujinx.Graphics.Gpu/Image/Sampler.cs +++ b/Ryujinx.Graphics.Gpu/Image/Sampler.cs @@ -8,6 +8,11 @@ namespace Ryujinx.Graphics.Gpu.Image /// class Sampler : IDisposable { + /// + /// True if the sampler is disposed, false otherwise. + /// + public bool IsDisposed { get; private set; } + /// /// Host sampler object. /// @@ -101,6 +106,8 @@ namespace Ryujinx.Graphics.Gpu.Image /// public void Dispose() { + IsDisposed = true; + _hostSampler.Dispose(); _anisoSampler?.Dispose(); } diff --git a/Ryujinx.Graphics.Gpu/Image/SamplerPool.cs b/Ryujinx.Graphics.Gpu/Image/SamplerPool.cs index e205ec487..e95800ada 100644 --- a/Ryujinx.Graphics.Gpu/Image/SamplerPool.cs +++ b/Ryujinx.Graphics.Gpu/Image/SamplerPool.cs @@ -48,6 +48,8 @@ namespace Ryujinx.Graphics.Gpu.Image Items[i] = null; } } + + UpdateModifiedSequence(); } SequenceNumber = Context.SequenceNumber; @@ -71,6 +73,39 @@ namespace Ryujinx.Graphics.Gpu.Image return sampler; } + /// + /// Checks if the pool was modified, and returns the last sequence number where a modification was detected. + /// + /// A number that increments each time a modification is detected + public int CheckModified() + { + if (SequenceNumber != Context.SequenceNumber) + { + SequenceNumber = Context.SequenceNumber; + + if (_forcedAnisotropy != GraphicsConfig.MaxAnisotropy) + { + _forcedAnisotropy = GraphicsConfig.MaxAnisotropy; + + for (int i = 0; i < Items.Length; i++) + { + if (Items[i] != null) + { + Items[i].Dispose(); + + Items[i] = null; + } + } + + UpdateModifiedSequence(); + } + + SynchronizeMemory(); + } + + return ModifiedSequenceNumber; + } + /// /// Implementation of the sampler pool range invalidation. /// diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs index aadb4260b..c5c7a09bd 100644 --- a/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -100,6 +100,11 @@ namespace Ryujinx.Graphics.Gpu.Image /// public bool AlwaysFlushOnOverlap { get; private set; } + /// + /// Increments when the host texture is swapped, or when the texture is removed from all pools. + /// + public int InvalidatedSequence { get; private set; } + private int _depth; private int _layers; public int FirstLayer { get; private set; } @@ -821,20 +826,25 @@ namespace Ryujinx.Graphics.Gpu.Image depth, levels, layers, - out Span decoded)) + out byte[] decoded)) { string texInfo = $"{Info.Target} {Info.FormatInfo.Format} {Info.Width}x{Info.Height}x{Info.DepthOrLayers} levels {Info.Levels}"; Logger.Debug?.Print(LogClass.Gpu, $"Invalid ASTC texture at 0x{Info.GpuAddress:X} ({texInfo})."); } + if (GraphicsConfig.EnableTextureRecompression) + { + decoded = BCnEncoder.EncodeBC7(decoded, width, height, depth, levels, layers); + } + data = decoded; } else if (!_context.Capabilities.SupportsR4G4Format && Format == Format.R4G4Unorm) { data = PixelConverter.ConvertR4G4ToR4G4B4A4(data); } - else if (!_context.Capabilities.Supports3DTextureCompression && Target == Target.Texture3D) + else if (!TextureCompatibility.HostSupportsBcFormat(Format, Target, _context.Capabilities)) { switch (Format) { @@ -858,6 +868,14 @@ namespace Ryujinx.Graphics.Gpu.Image case Format.Bc5Unorm: data = BCnDecoder.DecodeBC5(data, width, height, depth, levels, layers, Format == Format.Bc5Snorm); break; + case Format.Bc6HSfloat: + case Format.Bc6HUfloat: + data = BCnDecoder.DecodeBC6(data, width, height, depth, levels, layers, Format == Format.Bc6HSfloat); + break; + case Format.Bc7Srgb: + case Format.Bc7Unorm: + data = BCnDecoder.DecodeBC7(data, width, height, depth, levels, layers); + break; } } @@ -1211,16 +1229,18 @@ namespace Ryujinx.Graphics.Gpu.Image if (_arrayViewTexture == null && IsSameDimensionsTarget(target)) { + FormatInfo formatInfo = TextureCompatibility.ToHostCompatibleFormat(Info, _context.Capabilities); + TextureCreateInfo createInfo = new TextureCreateInfo( Info.Width, Info.Height, target == Target.CubemapArray ? 6 : 1, Info.Levels, Info.Samples, - Info.FormatInfo.BlockWidth, - Info.FormatInfo.BlockHeight, - Info.FormatInfo.BytesPerPixel, - Info.FormatInfo.Format, + formatInfo.BlockWidth, + formatInfo.BlockHeight, + formatInfo.BytesPerPixel, + formatInfo.Format, Info.DepthStencilMode, target, Info.SwizzleR, @@ -1407,6 +1427,7 @@ namespace Ryujinx.Graphics.Gpu.Image DisposeTextures(); HostTexture = hostTexture; + InvalidatedSequence++; } /// @@ -1535,6 +1556,8 @@ namespace Ryujinx.Graphics.Gpu.Image _poolOwners.Clear(); } + + InvalidatedSequence++; } /// diff --git a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs index 7ac4e12e2..86eacdb46 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs @@ -1,8 +1,12 @@ using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Gpu.Shader; using Ryujinx.Graphics.Shader; using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Image { @@ -31,21 +35,30 @@ namespace Ryujinx.Graphics.Gpu.Image private readonly TextureBindingInfo[][] _textureBindings; private readonly TextureBindingInfo[][] _imageBindings; - private struct TextureStatePerStage + private struct TextureState { public ITexture Texture; public ISampler Sampler; + + public int TextureHandle; + public int SamplerHandle; + public int InvalidatedSequence; + public Texture CachedTexture; + public Sampler CachedSampler; + public int ScaleIndex; + public TextureUsageFlags UsageFlags; } - private readonly TextureStatePerStage[][] _textureState; - private readonly TextureStatePerStage[][] _imageState; + private TextureState[] _textureState; + private TextureState[] _imageState; private int[] _textureBindingsCount; private int[] _imageBindingsCount; - private int _textureBufferIndex; + private int _texturePoolSequence; + private int _samplerPoolSequence; - private bool _rebind; + private int _textureBufferIndex; private readonly float[] _scales; private bool _scaleChanged; @@ -72,8 +85,8 @@ namespace Ryujinx.Graphics.Gpu.Image _textureBindings = new TextureBindingInfo[stages][]; _imageBindings = new TextureBindingInfo[stages][]; - _textureState = new TextureStatePerStage[stages][]; - _imageState = new TextureStatePerStage[stages][]; + _textureState = new TextureState[InitialTextureStateSize]; + _imageState = new TextureState[InitialImageStateSize]; _textureBindingsCount = new int[stages]; _imageBindingsCount = new int[stages]; @@ -82,9 +95,6 @@ namespace Ryujinx.Graphics.Gpu.Image { _textureBindings[stage] = new TextureBindingInfo[InitialTextureStateSize]; _imageBindings[stage] = new TextureBindingInfo[InitialImageStateSize]; - - _textureState[stage] = new TextureStatePerStage[InitialTextureStateSize]; - _imageState[stage] = new TextureStatePerStage[InitialImageStateSize]; } } @@ -99,15 +109,6 @@ namespace Ryujinx.Graphics.Gpu.Image if (count > _textureBindings[stage].Length) { Array.Resize(ref _textureBindings[stage], count); - Array.Resize(ref _textureState[stage], count); - } - - int toClear = Math.Max(_textureBindingsCount[stage], count); - TextureStatePerStage[] state = _textureState[stage]; - - for (int i = 0; i < toClear; i++) - { - state[i] = new TextureStatePerStage(); } _textureBindingsCount[stage] = count; @@ -126,15 +127,6 @@ namespace Ryujinx.Graphics.Gpu.Image if (count > _imageBindings[stage].Length) { Array.Resize(ref _imageBindings[stage], count); - Array.Resize(ref _imageState[stage], count); - } - - int toClear = Math.Max(_imageBindingsCount[stage], count); - TextureStatePerStage[] state = _imageState[stage]; - - for (int i = 0; i < toClear; i++) - { - state[i] = new TextureStatePerStage(); } _imageBindingsCount[stage] = count; @@ -142,6 +134,24 @@ namespace Ryujinx.Graphics.Gpu.Image return _imageBindings[stage]; } + /// + /// Sets the max binding indexes for textures and images. + /// + /// The maximum texture binding + /// The maximum image binding + public void SetMaxBindings(int maxTextureBinding, int maxImageBinding) + { + if (maxTextureBinding >= _textureState.Length) + { + Array.Resize(ref _textureState, maxTextureBinding + 1); + } + + if (maxImageBinding >= _imageState.Length) + { + Array.Resize(ref _imageState, maxImageBinding + 1); + } + } + /// /// Sets the textures constant buffer index. /// The constant buffer specified holds the texture handles. @@ -222,18 +232,18 @@ namespace Ryujinx.Graphics.Gpu.Image /// Updates the texture scale for a given texture or image. /// /// Start GPU virtual address of the pool - /// The related texture binding + /// The related texture usage flags /// The texture/image binding index /// The active shader stage /// True if the given texture has become blacklisted, indicating that its host texture may have changed. - private bool UpdateScale(Texture texture, TextureBindingInfo binding, int index, ShaderStage stage) + private bool UpdateScale(Texture texture, TextureUsageFlags usageFlags, int index, ShaderStage stage) { float result = 1f; bool changed = false; - if ((binding.Flags & TextureUsageFlags.NeedsScaleValue) != 0 && texture != null) + if ((usageFlags & TextureUsageFlags.NeedsScaleValue) != 0 && texture != null) { - if ((binding.Flags & TextureUsageFlags.ResScaleUnsupported) != 0) + if ((usageFlags & TextureUsageFlags.ResScaleUnsupported) != 0) { changed = texture.ScaleMode != TextureScaleMode.Blacklisted; texture.BlacklistScale(); @@ -284,6 +294,35 @@ namespace Ryujinx.Graphics.Gpu.Image return changed; } + /// + /// Determines if the vertex stage requires a scale value. + /// + private bool VertexRequiresScale() + { + bool requiresScale = false; + + for (int i = 0; i < _textureBindingsCount[0]; i++) + { + if ((_textureBindings[0][i].Flags & TextureUsageFlags.NeedsScaleValue) != 0) + { + return true; + } + } + + if (!requiresScale) + { + for (int i = 0; i < _imageBindingsCount[0]; i++) + { + if ((_imageBindings[0][i].Flags & TextureUsageFlags.NeedsScaleValue) != 0) + { + return true; + } + } + } + + return false; + } + /// /// Uploads texture and image scales to the backend when they are used. /// @@ -295,10 +334,10 @@ namespace Ryujinx.Graphics.Gpu.Image int fragmentIndex = (int)ShaderStage.Fragment - 1; int fragmentTotal = _isCompute ? 0 : (_textureBindingsCount[fragmentIndex] + _imageBindingsCount[fragmentIndex]); - if (total != 0 && fragmentTotal != _lastFragmentTotal) + if (total != 0 && fragmentTotal != _lastFragmentTotal && VertexRequiresScale()) { // Must update scales in the support buffer if: - // - Vertex stage has bindings. + // - Vertex stage has bindings that require scale. // - Fragment stage binding count has been updated since last render scale update. _scaleChanged = true; @@ -323,7 +362,9 @@ namespace Ryujinx.Graphics.Gpu.Image /// Ensures that the bindings are visible to the host GPU. /// Note: this actually performs the binding using the host graphics API. /// - public void CommitBindings() + /// Specialization state for the bound shader + /// True if all bound textures match the current shader specialiation state, false otherwise + public bool CommitBindings(ShaderSpecializationState specState) { ulong texturePoolAddress = _texturePoolAddress; @@ -331,10 +372,38 @@ namespace Ryujinx.Graphics.Gpu.Image ? _texturePoolCache.FindOrCreate(_channel, texturePoolAddress, _texturePoolMaximumId) : null; + // Check if the texture pool has been modified since bindings were last committed. + // If it wasn't, then it's possible to avoid looking up textures again when the handle remains the same. + bool poolModified = false; + + if (texturePool != null) + { + int texturePoolSequence = texturePool.CheckModified(); + + if (_texturePoolSequence != texturePoolSequence) + { + poolModified = true; + _texturePoolSequence = texturePoolSequence; + } + } + + if (_samplerPool != null) + { + int samplerPoolSequence = _samplerPool.CheckModified(); + + if (_samplerPoolSequence != samplerPoolSequence) + { + poolModified = true; + _samplerPoolSequence = samplerPoolSequence; + } + } + + bool specStateMatches = true; + if (_isCompute) { - CommitTextureBindings(texturePool, ShaderStage.Compute, 0); - CommitImageBindings (texturePool, ShaderStage.Compute, 0); + specStateMatches &= CommitTextureBindings(texturePool, ShaderStage.Compute, 0, poolModified, specState); + specStateMatches &= CommitImageBindings(texturePool, ShaderStage.Compute, 0, poolModified, specState); } else { @@ -342,14 +411,76 @@ namespace Ryujinx.Graphics.Gpu.Image { int stageIndex = (int)stage - 1; - CommitTextureBindings(texturePool, stage, stageIndex); - CommitImageBindings (texturePool, stage, stageIndex); + specStateMatches &= CommitTextureBindings(texturePool, stage, stageIndex, poolModified, specState); + specStateMatches &= CommitImageBindings(texturePool, stage, stageIndex, poolModified, specState); } } CommitRenderScale(); - _rebind = false; + return specStateMatches; + } + + /// + /// Fetch the constant buffers used for a texture to cache. + /// + /// Stage index of the constant buffer + /// The currently cached texture buffer index + /// The currently cached sampler buffer index + /// The currently cached texture buffer data + /// The currently cached sampler buffer data + /// The new texture buffer index + /// The new sampler buffer index + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void UpdateCachedBuffer( + int stageIndex, + ref int cachedTextureBufferIndex, + ref int cachedSamplerBufferIndex, + ref ReadOnlySpan cachedTextureBuffer, + ref ReadOnlySpan cachedSamplerBuffer, + int textureBufferIndex, + int samplerBufferIndex) + { + if (textureBufferIndex != cachedTextureBufferIndex) + { + ref BufferBounds bounds = ref _channel.BufferManager.GetUniformBufferBounds(_isCompute, stageIndex, textureBufferIndex); + + cachedTextureBuffer = MemoryMarshal.Cast(_channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size)); + cachedTextureBufferIndex = textureBufferIndex; + + if (samplerBufferIndex == textureBufferIndex) + { + cachedSamplerBuffer = cachedTextureBuffer; + cachedSamplerBufferIndex = samplerBufferIndex; + } + } + + if (samplerBufferIndex != cachedSamplerBufferIndex) + { + ref BufferBounds bounds = ref _channel.BufferManager.GetUniformBufferBounds(_isCompute, stageIndex, samplerBufferIndex); + + cachedSamplerBuffer = MemoryMarshal.Cast(_channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size)); + cachedSamplerBufferIndex = samplerBufferIndex; + } + } + + /// + /// Counts the total number of texture bindings used by all shader stages. + /// + /// The total amount of textures used + private int GetTextureBindingsCount() + { + int count = 0; + + for (int i = 0; i < _textureBindings.Length; i++) + { + if (_textureBindings[i] != null) + { + count += _textureBindings[i].Length; + } + } + + return count; } /// @@ -358,13 +489,16 @@ namespace Ryujinx.Graphics.Gpu.Image /// /// The current texture pool /// The shader stage using the textures to be bound - /// The stage number of the specified shader stage - private void CommitTextureBindings(TexturePool pool, ShaderStage stage, int stageIndex) + /// The stage number of the specified shader stageTrue if either the texture or sampler pool was modified, false otherwise + /// Specialization state for the bound shader + /// True if all bound textures match the current shader specialiation state, false otherwise + private bool CommitTextureBindings(TexturePool pool, ShaderStage stage, int stageIndex, bool poolModified, ShaderSpecializationState specState) { int textureCount = _textureBindingsCount[stageIndex]; if (textureCount == 0) { - return; + return true; } var samplerPool = _samplerPool; @@ -372,17 +506,27 @@ namespace Ryujinx.Graphics.Gpu.Image if (pool == null) { Logger.Error?.Print(LogClass.Gpu, $"Shader stage \"{stage}\" uses textures, but texture pool was not set."); - return; + return true; } + bool specStateMatches = true; + + int cachedTextureBufferIndex = -1; + int cachedSamplerBufferIndex = -1; + ReadOnlySpan cachedTextureBuffer = Span.Empty; + ReadOnlySpan cachedSamplerBuffer = Span.Empty; + for (int index = 0; index < textureCount; index++) { TextureBindingInfo bindingInfo = _textureBindings[stageIndex][index]; + TextureUsageFlags usageFlags = bindingInfo.Flags; (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(bindingInfo.CbufSlot, _textureBufferIndex); - int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex); - int textureId = UnpackTextureId(packedId); + UpdateCachedBuffer(stageIndex, ref cachedTextureBufferIndex, ref cachedSamplerBufferIndex, ref cachedTextureBuffer, ref cachedSamplerBuffer, textureBufferIndex, samplerBufferIndex); + + int packedId = TextureHandle.ReadPackedId(bindingInfo.Handle, cachedTextureBuffer, cachedSamplerBuffer); + int textureId = TextureHandle.UnpackTextureId(packedId); int samplerId; if (_samplerIndex == SamplerIndex.ViaHeaderIndex) @@ -391,46 +535,80 @@ namespace Ryujinx.Graphics.Gpu.Image } else { - samplerId = UnpackSamplerId(packedId); + samplerId = TextureHandle.UnpackSamplerId(packedId); } - Texture texture = pool.Get(textureId); + ref TextureState state = ref _textureState[bindingInfo.Binding]; + + if (!poolModified && + state.TextureHandle == textureId && + state.SamplerHandle == samplerId && + state.CachedTexture != null && + state.CachedTexture.InvalidatedSequence == state.InvalidatedSequence && + state.CachedSampler?.IsDisposed != true) + { + // The texture is already bound. + state.CachedTexture.SynchronizeMemory(); + + if ((state.ScaleIndex != index || state.UsageFlags != usageFlags) && + UpdateScale(state.CachedTexture, usageFlags, index, stage)) + { + ITexture hostTextureRebind = state.CachedTexture.GetTargetTexture(bindingInfo.Target); + + state.Texture = hostTextureRebind; + state.ScaleIndex = index; + state.UsageFlags = usageFlags; + + _context.Renderer.Pipeline.SetTextureAndSampler(stage, bindingInfo.Binding, hostTextureRebind, state.Sampler); + } + + continue; + } + + state.TextureHandle = textureId; + state.SamplerHandle = samplerId; + + ref readonly TextureDescriptor descriptor = ref pool.GetForBinding(textureId, out Texture texture); + + specStateMatches &= specState.MatchesTexture(stage, index, descriptor); + + Sampler sampler = _samplerPool?.Get(samplerId); ITexture hostTexture = texture?.GetTargetTexture(bindingInfo.Target); + ISampler hostSampler = sampler?.GetHostSampler(texture); if (hostTexture != null && texture.Target == Target.TextureBuffer) { // Ensure that the buffer texture is using the correct buffer as storage. // Buffers are frequently re-created to accomodate larger data, so we need to re-bind // to ensure we're not using a old buffer that was already deleted. - _channel.BufferManager.SetBufferTextureStorage(hostTexture, texture.Range.GetSubRange(0).Address, texture.Size, bindingInfo, bindingInfo.Format, false); + _channel.BufferManager.SetBufferTextureStorage(stage, hostTexture, texture.Range.GetSubRange(0).Address, texture.Size, bindingInfo, bindingInfo.Format, false); } else { - if (_textureState[stageIndex][index].Texture != hostTexture || _rebind) + if (state.Texture != hostTexture || state.Sampler != hostSampler) { - if (UpdateScale(texture, bindingInfo, index, stage)) + if (UpdateScale(texture, usageFlags, index, stage)) { hostTexture = texture?.GetTargetTexture(bindingInfo.Target); } - _textureState[stageIndex][index].Texture = hostTexture; + state.Texture = hostTexture; + state.ScaleIndex = index; + state.UsageFlags = usageFlags; - _context.Renderer.Pipeline.SetTexture(bindingInfo.Binding, hostTexture); + state.Sampler = hostSampler; + + _context.Renderer.Pipeline.SetTextureAndSampler(stage, bindingInfo.Binding, hostTexture, hostSampler); } - Sampler sampler = samplerPool?.Get(samplerId); - - ISampler hostSampler = sampler?.GetHostSampler(texture); - - if (_textureState[stageIndex][index].Sampler != hostSampler || _rebind) - { - _textureState[stageIndex][index].Sampler = hostSampler; - - _context.Renderer.Pipeline.SetSampler(bindingInfo.Binding, hostSampler); - } + state.CachedTexture = texture; + state.CachedSampler = sampler; + state.InvalidatedSequence = texture?.InvalidatedSequence ?? 0; } } + + return specStateMatches; } /// @@ -440,38 +618,90 @@ namespace Ryujinx.Graphics.Gpu.Image /// The current texture pool /// The shader stage using the textures to be bound /// The stage number of the specified shader stage - private void CommitImageBindings(TexturePool pool, ShaderStage stage, int stageIndex) + /// True if either the texture or sampler pool was modified, false otherwise + /// Specialization state for the bound shader + /// True if all bound images match the current shader specialiation state, false otherwise + private bool CommitImageBindings(TexturePool pool, ShaderStage stage, int stageIndex, bool poolModified, ShaderSpecializationState specState) { int imageCount = _imageBindingsCount[stageIndex]; if (imageCount == 0) { - return; + return true; } if (pool == null) { Logger.Error?.Print(LogClass.Gpu, $"Shader stage \"{stage}\" uses images, but texture pool was not set."); - return; + return true; } // Scales for images appear after the texture ones. int baseScaleIndex = _textureBindingsCount[stageIndex]; + int cachedTextureBufferIndex = -1; + int cachedSamplerBufferIndex = -1; + ReadOnlySpan cachedTextureBuffer = Span.Empty; + ReadOnlySpan cachedSamplerBuffer = Span.Empty; + + bool specStateMatches = true; + for (int index = 0; index < imageCount; index++) { TextureBindingInfo bindingInfo = _imageBindings[stageIndex][index]; + TextureUsageFlags usageFlags = bindingInfo.Flags; + int scaleIndex = baseScaleIndex + index; (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(bindingInfo.CbufSlot, _textureBufferIndex); - int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex); - int textureId = UnpackTextureId(packedId); + UpdateCachedBuffer(stageIndex, ref cachedTextureBufferIndex, ref cachedSamplerBufferIndex, ref cachedTextureBuffer, ref cachedSamplerBuffer, textureBufferIndex, samplerBufferIndex); - Texture texture = pool.Get(textureId); + int packedId = TextureHandle.ReadPackedId(bindingInfo.Handle, cachedTextureBuffer, cachedSamplerBuffer); + int textureId = TextureHandle.UnpackTextureId(packedId); - ITexture hostTexture = texture?.GetTargetTexture(bindingInfo.Target); + ref TextureState state = ref _imageState[bindingInfo.Binding]; bool isStore = bindingInfo.Flags.HasFlag(TextureUsageFlags.ImageStore); + if (!poolModified && + state.TextureHandle == textureId && + state.CachedTexture != null && + state.CachedTexture.InvalidatedSequence == state.InvalidatedSequence) + { + Texture cachedTexture = state.CachedTexture; + + // The texture is already bound. + cachedTexture.SynchronizeMemory(); + + if (isStore) + { + cachedTexture?.SignalModified(); + } + + if ((state.ScaleIndex != index || state.UsageFlags != usageFlags) && + UpdateScale(state.CachedTexture, usageFlags, scaleIndex, stage)) + { + ITexture hostTextureRebind = state.CachedTexture.GetTargetTexture(bindingInfo.Target); + + Format format = bindingInfo.Format == 0 ? cachedTexture.Format : bindingInfo.Format; + + state.Texture = hostTextureRebind; + state.ScaleIndex = scaleIndex; + state.UsageFlags = usageFlags; + + _context.Renderer.Pipeline.SetImage(bindingInfo.Binding, hostTextureRebind, format); + } + + continue; + } + + state.TextureHandle = textureId; + + ref readonly TextureDescriptor descriptor = ref pool.GetForBinding(textureId, out Texture texture); + + specStateMatches &= specState.MatchesImage(stage, index, descriptor); + + ITexture hostTexture = texture?.GetTargetTexture(bindingInfo.Target); + if (hostTexture != null && texture.Target == Target.TextureBuffer) { // Ensure that the buffer texture is using the correct buffer as storage. @@ -485,7 +715,7 @@ namespace Ryujinx.Graphics.Gpu.Image format = texture.Format; } - _channel.BufferManager.SetBufferTextureStorage(hostTexture, texture.Range.GetSubRange(0).Address, texture.Size, bindingInfo, format, true); + _channel.BufferManager.SetBufferTextureStorage(stage, hostTexture, texture.Range.GetSubRange(0).Address, texture.Size, bindingInfo, format, true); } else { @@ -494,14 +724,16 @@ namespace Ryujinx.Graphics.Gpu.Image texture?.SignalModified(); } - if (_imageState[stageIndex][index].Texture != hostTexture || _rebind) + if (state.Texture != hostTexture) { - if (UpdateScale(texture, bindingInfo, baseScaleIndex + index, stage)) + if (UpdateScale(texture, usageFlags, scaleIndex, stage)) { hostTexture = texture?.GetTargetTexture(bindingInfo.Target); } - _imageState[stageIndex][index].Texture = hostTexture; + state.Texture = hostTexture; + state.ScaleIndex = scaleIndex; + state.UsageFlags = usageFlags; Format format = bindingInfo.Format; @@ -512,8 +744,13 @@ namespace Ryujinx.Graphics.Gpu.Image _context.Renderer.Pipeline.SetImage(bindingInfo.Binding, hostTexture, format); } + + state.CachedTexture = texture; + state.InvalidatedSequence = texture?.InvalidatedSequence ?? 0; } } + + return specStateMatches; } /// @@ -537,7 +774,7 @@ namespace Ryujinx.Graphics.Gpu.Image (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(cbufSlot, bufferIndex); int packedId = ReadPackedId(stageIndex, handle, textureBufferIndex, samplerBufferIndex); - int textureId = UnpackTextureId(packedId); + int textureId = TextureHandle.UnpackTextureId(packedId); ulong poolAddress = _channel.MemoryManager.Translate(poolGpuVa); @@ -555,6 +792,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// Index of the constant buffer holding the texture handles /// Index of the constant buffer holding the sampler handles /// The packed texture and sampler ID (the real texture handle) + [MethodImpl(MethodImplOptions.AggressiveInlining)] private int ReadPackedId(int stageIndex, int wordOffset, int textureBufferIndex, int samplerBufferIndex) { (int textureWordOffset, int samplerWordOffset, TextureHandleType handleType) = TextureHandle.UnpackOffsets(wordOffset); @@ -590,32 +828,13 @@ namespace Ryujinx.Graphics.Gpu.Image return handle; } - /// - /// Unpacks the texture ID from the real texture handle. - /// - /// The real texture handle - /// The texture ID - private static int UnpackTextureId(int packedId) - { - return (packedId >> 0) & 0xfffff; - } - - /// - /// Unpacks the sampler ID from the real texture handle. - /// - /// The real texture handle - /// The sampler ID - private static int UnpackSamplerId(int packedId) - { - return (packedId >> 20) & 0xfff; - } - /// /// Force all bound textures and images to be rebound the next time CommitBindings is called. /// public void Rebind() { - _rebind = true; + Array.Clear(_textureState); + Array.Clear(_imageState); } /// diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs index 045410571..ba863a1e1 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs @@ -349,6 +349,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// GPU memory manager where the texture is mapped /// Depth-stencil buffer texture to find or create /// Size of the depth-stencil texture + /// Indicates if the texture might be accessed with a non-zero layer index /// Number of samples in the X direction, for MSAA /// Number of samples in the Y direction, for MSAA /// A hint indicating the minimum used size for the texture @@ -357,6 +358,7 @@ namespace Ryujinx.Graphics.Gpu.Image MemoryManager memoryManager, RtDepthStencilState dsState, Size3D size, + bool layered, int samplesInX, int samplesInY, Size sizeHint) @@ -364,9 +366,24 @@ namespace Ryujinx.Graphics.Gpu.Image int gobBlocksInY = dsState.MemoryLayout.UnpackGobBlocksInY(); int gobBlocksInZ = dsState.MemoryLayout.UnpackGobBlocksInZ(); - Target target = (samplesInX | samplesInY) != 1 - ? Target.Texture2DMultisample - : Target.Texture2D; + Target target; + + if (dsState.MemoryLayout.UnpackIsTarget3D()) + { + target = Target.Texture3D; + } + else if ((samplesInX | samplesInY) != 1) + { + target = size.Depth > 1 && layered + ? Target.Texture2DMultisampleArray + : Target.Texture2DMultisample; + } + else + { + target = size.Depth > 1 && layered + ? Target.Texture2DArray + : Target.Texture2D; + } FormatInfo formatInfo = dsState.Format.Convert(); diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs b/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs index 61b48dc4d..ea9801bbb 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs @@ -71,11 +71,15 @@ namespace Ryujinx.Graphics.Gpu.Image { if (info.FormatInfo.Format.IsAstcUnorm()) { - return new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4); + return GraphicsConfig.EnableTextureRecompression + ? new FormatInfo(Format.Bc7Unorm, 4, 4, 16, 4) + : new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4); } else if (info.FormatInfo.Format.IsAstcSrgb()) { - return new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4); + return GraphicsConfig.EnableTextureRecompression + ? new FormatInfo(Format.Bc7Srgb, 4, 4, 16, 4) + : new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4); } } @@ -84,9 +88,9 @@ namespace Ryujinx.Graphics.Gpu.Image return new FormatInfo(Format.R4G4B4A4Unorm, 1, 1, 2, 4); } - if (!caps.Supports3DTextureCompression && info.Target == Target.Texture3D) + if (!HostSupportsBcFormat(info.FormatInfo.Format, info.Target, caps)) { - // The host API does not support 3D compressed formats. + // The host API does not this compressed format. // We assume software decompression will be done for those textures, // and so we adjust the format here to match the decompressor output. switch (info.FormatInfo.Format) @@ -94,10 +98,12 @@ namespace Ryujinx.Graphics.Gpu.Image case Format.Bc1RgbaSrgb: case Format.Bc2Srgb: case Format.Bc3Srgb: + case Format.Bc7Srgb: return new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4); case Format.Bc1RgbaUnorm: case Format.Bc2Unorm: case Format.Bc3Unorm: + case Format.Bc7Unorm: return new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4); case Format.Bc4Unorm: return new FormatInfo(Format.R8Unorm, 1, 1, 1, 1); @@ -107,12 +113,50 @@ namespace Ryujinx.Graphics.Gpu.Image return new FormatInfo(Format.R8G8Unorm, 1, 1, 2, 2); case Format.Bc5Snorm: return new FormatInfo(Format.R8G8Snorm, 1, 1, 2, 2); + case Format.Bc6HSfloat: + case Format.Bc6HUfloat: + return new FormatInfo(Format.R16G16B16A16Float, 1, 1, 8, 4); } } return info.FormatInfo; } + /// + /// Checks if the host API supports a given texture compression format of the BC family. + /// + /// BC format to be checked + /// Target usage of the texture + /// Host GPU Capabilities + /// True if the texture host supports the format with the given target usage, false otherwise + public static bool HostSupportsBcFormat(Format format, Target target, Capabilities caps) + { + bool not3DOr3DCompressionSupported = target != Target.Texture3D || caps.Supports3DTextureCompression; + + switch (format) + { + case Format.Bc1RgbaSrgb: + case Format.Bc1RgbaUnorm: + case Format.Bc2Srgb: + case Format.Bc2Unorm: + case Format.Bc3Srgb: + case Format.Bc3Unorm: + return caps.SupportsBc123Compression && not3DOr3DCompressionSupported; + case Format.Bc4Unorm: + case Format.Bc4Snorm: + case Format.Bc5Unorm: + case Format.Bc5Snorm: + return caps.SupportsBc45Compression && not3DOr3DCompressionSupported; + case Format.Bc6HSfloat: + case Format.Bc6HUfloat: + case Format.Bc7Srgb: + case Format.Bc7Unorm: + return caps.SupportsBc67Compression && not3DOr3DCompressionSupported; + } + + return true; + } + /// /// Determines whether a texture can flush its data back to guest memory. /// @@ -744,7 +788,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// True if the texture target and samples count matches, false otherwise public static bool TargetAndSamplesCompatible(TextureInfo lhs, TextureInfo rhs) { - return lhs.Target == rhs.Target && + return lhs.Target == rhs.Target && lhs.SamplesInX == rhs.SamplesInX && lhs.SamplesInY == rhs.SamplesInY; } diff --git a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs index a1c292912..628c31596 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs @@ -1,5 +1,6 @@ using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Shader; using System; namespace Ryujinx.Graphics.Gpu.Image @@ -10,9 +11,11 @@ namespace Ryujinx.Graphics.Gpu.Image class TextureManager : IDisposable { private readonly GpuContext _context; + private readonly GpuChannel _channel; private readonly TextureBindingsManager _cpBindingsManager; private readonly TextureBindingsManager _gpBindingsManager; + private readonly TexturePoolCache _texturePoolCache; private readonly Texture[] _rtColors; private readonly ITexture[] _rtHostColors; @@ -35,6 +38,7 @@ namespace Ryujinx.Graphics.Gpu.Image public TextureManager(GpuContext context, GpuChannel channel) { _context = context; + _channel = channel; TexturePoolCache texturePoolCache = new TexturePoolCache(context); @@ -43,6 +47,7 @@ namespace Ryujinx.Graphics.Gpu.Image _cpBindingsManager = new TextureBindingsManager(context, channel, texturePoolCache, scales, isCompute: true); _gpBindingsManager = new TextureBindingsManager(context, channel, texturePoolCache, scales, isCompute: false); + _texturePoolCache = texturePoolCache; _rtColors = new Texture[Constants.TotalRenderTargets]; _rtHostColors = new ITexture[Constants.TotalRenderTargets]; @@ -99,6 +104,16 @@ namespace Ryujinx.Graphics.Gpu.Image _cpBindingsManager.SetTextureBufferIndex(index); } + /// + /// Sets the max binding indexes on the compute pipeline. + /// + /// The maximum texture binding + /// The maximum image binding + public void SetComputeMaxBindings(int maxTextureBinding, int maxImageBinding) + { + _cpBindingsManager.SetMaxBindings(maxTextureBinding, maxImageBinding); + } + /// /// Sets the texture constant buffer index on the graphics pipeline. /// @@ -108,6 +123,16 @@ namespace Ryujinx.Graphics.Gpu.Image _gpBindingsManager.SetTextureBufferIndex(index); } + /// + /// Sets the max binding indexes on the graphics pipeline. + /// + /// The maximum texture binding + /// The maximum image binding + public void SetGraphicsMaxBindings(int maxTextureBinding, int maxImageBinding) + { + _gpBindingsManager.SetMaxBindings(maxTextureBinding, maxImageBinding); + } + /// /// Sets the current sampler pool on the compute pipeline. /// @@ -335,25 +360,48 @@ namespace Ryujinx.Graphics.Gpu.Image /// /// Commits bindings on the compute pipeline. /// - public void CommitComputeBindings() + /// Specialization state for the bound shader + /// True if all bound textures match the current shader specialization state, false otherwise + public bool CommitComputeBindings(ShaderSpecializationState specState) { // Every time we switch between graphics and compute work, // we must rebind everything. // Since compute work happens less often, we always do that // before and after the compute dispatch. _cpBindingsManager.Rebind(); - _cpBindingsManager.CommitBindings(); + bool result = _cpBindingsManager.CommitBindings(specState); _gpBindingsManager.Rebind(); + + return result; } /// /// Commits bindings on the graphics pipeline. /// - public void CommitGraphicsBindings() + /// Specialization state for the bound shader + /// True if all bound textures match the current shader specialization state, false otherwise + public bool CommitGraphicsBindings(ShaderSpecializationState specState) { - _gpBindingsManager.CommitBindings(); + bool result = _gpBindingsManager.CommitBindings(specState); UpdateRenderTargets(); + + return result; + } + + /// + /// Returns a texture pool from the cache, with the given address and maximum id. + /// + /// GPU virtual address of the texture pool + /// Maximum ID of the texture pool + /// The texture pool + public TexturePool GetTexturePool(ulong poolGpuVa, int maximumId) + { + ulong poolAddress = _channel.MemoryManager.Translate(poolGpuVa); + + TexturePool texturePool = _texturePoolCache.FindOrCreate(_channel, poolAddress, maximumId); + + return texturePool; } /// diff --git a/Ryujinx.Graphics.Gpu/Image/TexturePool.cs b/Ryujinx.Graphics.Gpu/Image/TexturePool.cs index 10a6ff82a..75974c43b 100644 --- a/Ryujinx.Graphics.Gpu/Image/TexturePool.cs +++ b/Ryujinx.Graphics.Gpu/Image/TexturePool.cs @@ -14,6 +14,7 @@ namespace Ryujinx.Graphics.Gpu.Image { private readonly GpuChannel _channel; private readonly ConcurrentQueue _dereferenceQueue = new ConcurrentQueue(); + private TextureDescriptor _defaultDescriptor; /// /// Intrusive linked list node used on the texture pool cache. @@ -32,6 +33,62 @@ namespace Ryujinx.Graphics.Gpu.Image _channel = channel; } + /// + /// Gets the texture descripor and texture with the given ID with no bounds check or synchronization. + /// + /// ID of the texture. This is effectively a zero-based index + /// The texture with the given ID + /// The texture descriptor with the given ID + private ref readonly TextureDescriptor GetInternal(int id, out Texture texture) + { + texture = Items[id]; + + ref readonly TextureDescriptor descriptor = ref GetDescriptorRef(id); + + if (texture == null) + { + TextureInfo info = GetInfo(descriptor, out int layerSize); + + ProcessDereferenceQueue(); + + texture = PhysicalMemory.TextureCache.FindOrCreateTexture(_channel.MemoryManager, TextureSearchFlags.ForSampler, info, layerSize); + + // If this happens, then the texture address is invalid, we can't add it to the cache. + if (texture == null) + { + return ref descriptor; + } + + texture.IncrementReferenceCount(this, id); + + Items[id] = texture; + + DescriptorCache[id] = descriptor; + } + else + { + if (texture.ChangedSize) + { + // Texture changed size at one point - it may be a different size than the sampler expects. + // This can be triggered when the size is changed by a size hint on copy or draw, but the texture has been sampled before. + + int baseLevel = descriptor.UnpackBaseLevel(); + int width = Math.Max(1, descriptor.UnpackWidth() >> baseLevel); + int height = Math.Max(1, descriptor.UnpackHeight() >> baseLevel); + + if (texture.Info.Width != width || texture.Info.Height != height) + { + texture.ChangeSize(width, height, texture.Info.DepthOrLayers); + } + } + + // Memory is automatically synchronized on texture creation. + texture.SynchronizeMemory(); + } + + return ref descriptor; + } + /// /// Gets the texture with the given ID. /// @@ -51,56 +108,49 @@ namespace Ryujinx.Graphics.Gpu.Image SynchronizeMemory(); } - Texture texture = Items[id]; - - if (texture == null) - { - TextureDescriptor descriptor = GetDescriptor(id); - - TextureInfo info = GetInfo(descriptor, out int layerSize); - - ProcessDereferenceQueue(); - - texture = PhysicalMemory.TextureCache.FindOrCreateTexture(_channel.MemoryManager, TextureSearchFlags.ForSampler, info, layerSize); - - // If this happens, then the texture address is invalid, we can't add it to the cache. - if (texture == null) - { - return null; - } - - texture.IncrementReferenceCount(this, id); - - Items[id] = texture; - - DescriptorCache[id] = descriptor; - } - else - { - if (texture.ChangedSize) - { - // Texture changed size at one point - it may be a different size than the sampler expects. - // This can be triggered when the size is changed by a size hint on copy or draw, but the texture has been sampled before. - - TextureDescriptor descriptor = GetDescriptor(id); - - int baseLevel = descriptor.UnpackBaseLevel(); - int width = Math.Max(1, descriptor.UnpackWidth() >> baseLevel); - int height = Math.Max(1, descriptor.UnpackHeight() >> baseLevel); - - if (texture.Info.Width != width || texture.Info.Height != height) - { - texture.ChangeSize(width, height, texture.Info.DepthOrLayers); - } - } - - // Memory is automatically synchronized on texture creation. - texture.SynchronizeMemory(); - } + GetInternal(id, out Texture texture); return texture; } + /// + /// Gets the texture descriptor and texture with the given ID. + /// + /// + /// This method assumes that the pool has been manually synchronized before doing binding. + /// + /// ID of the texture. This is effectively a zero-based index + /// The texture with the given ID + /// The texture descriptor with the given ID + public ref readonly TextureDescriptor GetForBinding(int id, out Texture texture) + { + if ((uint)id >= Items.Length) + { + texture = null; + return ref _defaultDescriptor; + } + + // When getting for binding, assume the pool has already been synchronized. + + return ref GetInternal(id, out texture); + } + + /// + /// Checks if the pool was modified, and returns the last sequence number where a modification was detected. + /// + /// A number that increments each time a modification is detected + public int CheckModified() + { + if (SequenceNumber != Context.SequenceNumber) + { + SequenceNumber = Context.SequenceNumber; + + SynchronizeMemory(); + } + + return ModifiedSequenceNumber; + } + /// /// Forcibly remove a texture from this pool's items. /// If deferred, the dereference will be queued to occur on the render thread. @@ -175,7 +225,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// The texture descriptor /// Layer size for textures using a sub-range of mipmap levels, otherwise 0 /// The texture information - private TextureInfo GetInfo(TextureDescriptor descriptor, out int layerSize) + private TextureInfo GetInfo(in TextureDescriptor descriptor, out int layerSize) { int depthOrLayers = descriptor.UnpackDepth(); int levels = descriptor.UnpackLevels(); diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index 71f202aed..9f1f88b1e 100644 --- a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -378,6 +378,25 @@ namespace Ryujinx.Graphics.Gpu.Memory return _gpUniformBuffers[stage].Buffers[index].Address; } + /// + /// Gets the bounds of the uniform buffer currently bound at the given index. + /// + /// Indicates whenever the uniform is requested by the 3D or compute engine + /// Index of the shader stage, if the uniform is for the 3D engine + /// Index of the uniform buffer binding + /// The uniform buffer bounds, or an undefined value if the buffer is not currently bound + public ref BufferBounds GetUniformBufferBounds(bool isCompute, int stage, int index) + { + if (isCompute) + { + return ref _cpUniformBuffers.Buffers[index]; + } + else + { + return ref _gpUniformBuffers[stage].Buffers[index]; + } + } + /// /// Ensures that the compute engine bindings are visible to the host GPU. /// Note: this actually performs the binding using the host graphics API. @@ -416,7 +435,7 @@ namespace Ryujinx.Graphics.Gpu.Memory } else { - _context.Renderer.Pipeline.SetTexture(binding.BindingInfo.Binding, binding.Texture); + _context.Renderer.Pipeline.SetTextureAndSampler(binding.Stage, binding.BindingInfo.Binding, binding.Texture, null); } } @@ -700,17 +719,25 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Sets the buffer storage of a buffer texture. This will be bound when the buffer manager commits bindings. /// + /// Shader stage accessing the texture /// Buffer texture /// Address of the buffer in memory /// Size of the buffer in bytes /// Binding info for the buffer texture /// Format of the buffer texture /// Whether the binding is for an image or a sampler - public void SetBufferTextureStorage(ITexture texture, ulong address, ulong size, TextureBindingInfo bindingInfo, Format format, bool isImage) + public void SetBufferTextureStorage( + ShaderStage stage, + ITexture texture, + ulong address, + ulong size, + TextureBindingInfo bindingInfo, + Format format, + bool isImage) { _channel.MemoryManager.Physical.BufferCache.CreateBuffer(address, size); - _bufferTextures.Add(new BufferTextureBinding(texture, address, size, bindingInfo, format, isImage)); + _bufferTextures.Add(new BufferTextureBinding(stage, texture, address, size, bindingInfo, format, isImage)); } /// diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferTextureBinding.cs b/Ryujinx.Graphics.Gpu/Memory/BufferTextureBinding.cs index cf0d225e8..2a1408708 100644 --- a/Ryujinx.Graphics.Gpu/Memory/BufferTextureBinding.cs +++ b/Ryujinx.Graphics.Gpu/Memory/BufferTextureBinding.cs @@ -1,5 +1,6 @@ using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Shader; namespace Ryujinx.Graphics.Gpu.Memory { @@ -8,6 +9,11 @@ namespace Ryujinx.Graphics.Gpu.Memory /// struct BufferTextureBinding { + /// + /// Shader stage accessing the texture. + /// + public ShaderStage Stage { get; } + /// /// The buffer texture. /// @@ -41,14 +47,23 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Create a new buffer texture binding. /// + /// Shader stage accessing the texture /// Buffer texture /// Base address /// Size in bytes /// Binding info /// Binding format /// Whether the binding is for an image or a sampler - public BufferTextureBinding(ITexture texture, ulong address, ulong size, TextureBindingInfo bindingInfo, Format format, bool isImage) + public BufferTextureBinding( + ShaderStage stage, + ITexture texture, + ulong address, + ulong size, + TextureBindingInfo bindingInfo, + Format format, + bool isImage) { + Stage = stage; Texture = texture; Address = address; Size = size; diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index 0ac6160d9..9d246b5cb 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -252,6 +252,11 @@ namespace Ryujinx.Graphics.Gpu.Memory Write(va, MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref value, 1))); } + public void WriteUntracked(ulong va, T value) where T : unmanaged + { + WriteUntracked(va, MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref value, 1))); + } + /// /// Writes data to GPU mapped memory. /// diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs index 4de6eff91..92f15139c 100644 --- a/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs +++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Migration.cs @@ -105,7 +105,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache entry.Header.GpuAccessorHeader.ComputeLocalMemorySize, entry.Header.GpuAccessorHeader.ComputeSharedMemorySize); - ShaderSpecializationState specState = new ShaderSpecializationState(computeState); + ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState); foreach (var td in entry.TextureDescriptors) { @@ -163,11 +163,20 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache _ => PrimitiveTopology.Points }; + Array32 attributeTypes = default; + GpuChannelGraphicsState graphicsState = new GpuChannelGraphicsState( accessorHeader.StateFlags.HasFlag(GuestGpuStateFlags.EarlyZForce), topology, tessMode, - false); + false, + false, + false, + 1f, + false, + CompareOp.Always, + 0f, + ref attributeTypes); TransformFeedbackDescriptor[] tfdNew = null; @@ -189,7 +198,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache } } - ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, tfdNew); + ProgramPipelineState pipelineState = default; + + ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipelineState, tfdNew); for (int i = 0; i < entries.Length; i++) { diff --git a/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs b/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs index 3b4c65f3d..69fcb2780 100644 --- a/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs +++ b/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs @@ -35,6 +35,8 @@ namespace Ryujinx.Graphics.Gpu.Shader HostProgram = hostProgram; SpecializationState = specializationState; Shaders = shaders; + + SpecializationState.Prepare(shaders); } /// diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs index 5c5e41c69..98655ed68 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/BackgroundDiskCacheWriter.cs @@ -83,7 +83,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache { _context = context; _hostStorage = hostStorage; - _fileWriterWorkerQueue = new AsyncWorkQueue(ProcessTask, "Gpu.BackgroundDiskCacheWriter"); + _fileWriterWorkerQueue = new AsyncWorkQueue(ProcessTask, "GPU.BackgroundDiskCacheWriter"); } /// diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs index bc63f714d..81569080b 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs @@ -1,6 +1,8 @@ using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; using System.Runtime.InteropServices; @@ -16,7 +18,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private readonly ShaderSpecializationState _oldSpecState; private readonly ShaderSpecializationState _newSpecState; private readonly int _stageIndex; - private ResourceCounts _resourceCounts; + private readonly bool _isVulkan; + private readonly ResourceCounts _resourceCounts; /// /// Creates a new instance of the cached GPU state accessor for shader translation. @@ -34,13 +37,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache ShaderSpecializationState oldSpecState, ShaderSpecializationState newSpecState, ResourceCounts counts, - int stageIndex) : base(context) + int stageIndex) : base(context, counts, stageIndex) { _data = data; _cb1Data = cb1Data; _oldSpecState = oldSpecState; _newSpecState = newSpecState; _stageIndex = stageIndex; + _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; _resourceCounts = counts; } @@ -68,27 +72,33 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache } /// - public int QueryBindingConstantBuffer(int index) + public AlphaTestOp QueryAlphaTestCompare() { - return _resourceCounts.UniformBuffersCount++; + if (!_isVulkan || !_oldSpecState.GraphicsState.AlphaTestEnable) + { + return AlphaTestOp.Always; + } + + return _oldSpecState.GraphicsState.AlphaTestCompare switch + { + CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never, + CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less, + CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal, + CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual, + CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater, + CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual, + CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual, + _ => AlphaTestOp.Always + }; } /// - public int QueryBindingStorageBuffer(int index) - { - return _resourceCounts.StorageBuffersCount++; - } + public float QueryAlphaTestReference() => _oldSpecState.GraphicsState.AlphaTestReference; /// - public int QueryBindingTexture(int index) + public AttributeType QueryAttributeType(int location) { - return _resourceCounts.TexturesCount++; - } - - /// - public int QueryBindingImage(int index) - { - return _resourceCounts.ImagesCount++; + return _oldSpecState.GraphicsState.AttributeTypes[location]; } /// @@ -120,6 +130,18 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache return ConvertToInputTopology(_oldSpecState.GraphicsState.Topology, _oldSpecState.GraphicsState.TessellationMode); } + /// + public bool QueryProgramPointSize() + { + return _oldSpecState.GraphicsState.ProgramPointSizeEnable; + } + + /// + public float QueryPointSize() + { + return _oldSpecState.GraphicsState.PointSize; + } + /// public bool QueryTessCw() { @@ -160,6 +182,12 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache return _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot); } + /// + public bool QueryTransformDepthMinusOneToOne() + { + return _oldSpecState.GraphicsState.DepthMode; + } + /// public bool QueryTransformFeedbackEnabled() { diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs index 4e338094f..01034b495 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGuestStorage.cs @@ -14,7 +14,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const uint TocMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'G' << 24); private const ushort VersionMajor = 1; - private const ushort VersionMinor = 0; + private const ushort VersionMinor = 1; private const uint VersionPacked = ((uint)VersionMajor << 16) | VersionMinor; private const string TocFileName = "guest.toc"; @@ -193,8 +193,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// Guest TOC file stream /// Guest data file stream /// Guest shader index - /// Tuple with the guest code and constant buffer 1 data, respectively - public (byte[], byte[]) LoadShader(Stream tocFileStream, Stream dataFileStream, int index) + /// Guest code and constant buffer 1 data + public GuestCodeAndCbData LoadShader(Stream tocFileStream, Stream dataFileStream, int index) { if (_cache == null || index >= _cache.Length) { @@ -226,7 +226,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache _cache[index] = (guestCode, cb1Data); } - return (guestCode, cb1Data); + return new GuestCodeAndCbData(guestCode, cb1Data); } /// diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 5d99957f0..a25b2d9a4 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -1,5 +1,6 @@ using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; using System.IO; using System.Numerics; @@ -19,9 +20,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const uint TexdMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'D' << 24); private const ushort FileFormatVersionMajor = 1; - private const ushort FileFormatVersionMinor = 1; + private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 1; + private const uint CodeGenVersion = 9; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; @@ -77,9 +78,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache public ulong Offset; /// - /// Size. + /// Size of uncompressed data. /// - public uint Size; + public uint UncompressedSize; + + /// + /// Size of compressed data. + /// + public uint CompressedSize; } /// @@ -196,6 +202,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private static string GetHostFileName(GpuContext context) { string apiName = context.Capabilities.Api.ToString().ToLowerInvariant(); + + // We are just storing SPIR-V directly on Vulkan, so the code won't change per vendor. + // We can just have a single file for all vendors. + if (context.Capabilities.Api == TargetApi.Vulkan) + { + return apiName; + } + string vendorName = RemoveInvalidCharacters(context.Capabilities.VendorName.ToLowerInvariant()); return $"{apiName}_{vendorName}"; } @@ -324,7 +338,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache stagesBitMask = 1; } - CachedShaderStage[] shaders = new CachedShaderStage[isCompute ? 1 : Constants.ShaderStages + 1]; + GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[isCompute ? 1 : Constants.ShaderStages + 1]; DataEntryPerStage stageEntry = new DataEntryPerStage(); @@ -334,15 +348,11 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache dataReader.Read(ref stageEntry); - ShaderProgramInfo info = stageIndex != 0 || isCompute ? ReadShaderProgramInfo(ref dataReader) : null; - - (byte[] guestCode, byte[] cb1Data) = _guestStorage.LoadShader( + guestShaders[stageIndex] = _guestStorage.LoadShader( guestTocFileStream, guestDataFileStream, stageEntry.GuestCodeIndex); - shaders[stageIndex] = new CachedShaderStage(info, guestCode, cb1Data); - stagesBitMask &= ~(1u << stageIndex); } @@ -351,17 +361,38 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache if (loadHostCache) { - byte[] hostCode = ReadHostCode(context, ref hostTocFileStream, ref hostDataFileStream, programIndex); + (byte[] hostCode, CachedShaderStage[] shaders) = ReadHostCode( + context, + ref hostTocFileStream, + ref hostDataFileStream, + guestShaders, + programIndex); if (hostCode != null) { bool hasFragmentShader = shaders.Length > 5 && shaders[5] != null; int fragmentOutputMap = hasFragmentShader ? shaders[5].Info.FragmentOutputMap : -1; - IProgram hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, new ShaderInfo(fragmentOutputMap)); + + ShaderInfo shaderInfo = specState.PipelineState.HasValue + ? new ShaderInfo(fragmentOutputMap, specState.PipelineState.Value, fromCache: true) + : new ShaderInfo(fragmentOutputMap, fromCache: true); + + IProgram hostProgram; + + if (context.Capabilities.Api == TargetApi.Vulkan) + { + ShaderSource[] shaderSources = ShaderBinarySerializer.Unpack(shaders, hostCode, isCompute); + + hostProgram = context.Renderer.CreateProgram(shaderSources, shaderInfo); + } + else + { + hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, shaderInfo); + } CachedShaderProgram program = new CachedShaderProgram(hostProgram, specState, shaders); - loader.QueueHostProgram(program, hostProgram, programIndex, isCompute); + loader.QueueHostProgram(program, hostCode, programIndex, isCompute); } else { @@ -371,7 +402,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache if (!loadHostCache) { - loader.QueueGuestProgram(shaders, specState, programIndex, isCompute); + loader.QueueGuestProgram(guestShaders, specState, programIndex, isCompute); } loader.CheckCompilation(); @@ -393,9 +424,15 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// GPU context /// Host TOC file stream, intialized if needed /// Host data file stream, initialized if needed + /// Guest shader code for each active stage /// Index of the program on the cache /// Host binary code, or null if not found - private byte[] ReadHostCode(GpuContext context, ref Stream tocFileStream, ref Stream dataFileStream, int programIndex) + private (byte[], CachedShaderStage[]) ReadHostCode( + GpuContext context, + ref Stream tocFileStream, + ref Stream dataFileStream, + GuestCodeAndCbData?[] guestShaders, + int programIndex) { if (tocFileStream == null && dataFileStream == null) { @@ -404,7 +441,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath)) { - return null; + return (null, null); } tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: false); @@ -414,7 +451,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache int offset = Unsafe.SizeOf() + programIndex * Unsafe.SizeOf(); if (offset + Unsafe.SizeOf() > tocFileStream.Length) { - return null; + return (null, null); } if ((ulong)offset >= (ulong)dataFileStream.Length) @@ -436,11 +473,33 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache dataFileStream.Seek((long)offsetAndSize.Offset, SeekOrigin.Begin); - byte[] hostCode = new byte[offsetAndSize.Size]; + byte[] hostCode = new byte[offsetAndSize.UncompressedSize]; BinarySerializer.ReadCompressed(dataFileStream, hostCode); - return hostCode; + CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length]; + BinarySerializer dataReader = new BinarySerializer(dataFileStream); + + dataFileStream.Seek((long)(offsetAndSize.Offset + offsetAndSize.CompressedSize), SeekOrigin.Begin); + + dataReader.BeginCompression(); + + for (int index = 0; index < guestShaders.Length; index++) + { + if (!guestShaders[index].HasValue) + { + continue; + } + + GuestCodeAndCbData guestShader = guestShaders[index].Value; + ShaderProgramInfo info = index != 0 || guestShaders.Length == 1 ? ReadShaderProgramInfo(ref dataReader) : null; + + shaders[index] = new CachedShaderStage(info, guestShader.Code, guestShader.Cb1Data); + } + + dataReader.EndCompression(); + + return (hostCode, shaders); } /// @@ -519,8 +578,6 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache stageEntry.GuestCodeIndex = _guestStorage.AddShader(shader.Code, shader.Cb1Data); dataWriter.Write(ref stageEntry); - - WriteShaderProgramInfo(ref dataWriter, shader.Info); } program.SpecializationState.Write(ref dataWriter); @@ -537,7 +594,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache return; } - WriteHostCode(context, hostCode, -1, streams); + WriteHostCode(context, hostCode, program.Shaders, streams); } /// @@ -574,29 +631,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache dataFileStream.SetLength(0); } - /// - /// Adds a host binary shader to the host cache. - /// - /// - /// This only modifies the host cache. The shader must already exist in the other caches. - /// This method should only be used for rebuilding the host cache after a clear. - /// - /// GPU context - /// Host binary code - /// Index of the program in the cache - public void AddHostShader(GpuContext context, ReadOnlySpan hostCode, int programIndex) - { - WriteHostCode(context, hostCode, programIndex); - } - /// /// Writes the host binary code on the host cache. /// /// GPU context /// Host binary code - /// Index of the program in the cache + /// Shader stages to be added to the host cache /// Output streams to use - private void WriteHostCode(GpuContext context, ReadOnlySpan hostCode, int programIndex, DiskCacheOutputStreams streams = null) + private void WriteHostCode(GpuContext context, ReadOnlySpan hostCode, CachedShaderStage[] shaders, DiskCacheOutputStreams streams = null) { var tocFileStream = streams != null ? streams.HostTocFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true); var dataFileStream = streams != null ? streams.HostDataFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true); @@ -607,26 +649,36 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache CreateToc(tocFileStream, ref header, TochMagic, 0); } - if (programIndex == -1) - { - tocFileStream.Seek(0, SeekOrigin.End); - } - else - { - tocFileStream.Seek(Unsafe.SizeOf() + (programIndex * Unsafe.SizeOf()), SeekOrigin.Begin); - } - + tocFileStream.Seek(0, SeekOrigin.End); dataFileStream.Seek(0, SeekOrigin.End); BinarySerializer tocWriter = new BinarySerializer(tocFileStream); + BinarySerializer dataWriter = new BinarySerializer(dataFileStream); OffsetAndSize offsetAndSize = new OffsetAndSize(); offsetAndSize.Offset = (ulong)dataFileStream.Position; - offsetAndSize.Size = (uint)hostCode.Length; - tocWriter.Write(ref offsetAndSize); + offsetAndSize.UncompressedSize = (uint)hostCode.Length; + + long dataStartPosition = dataFileStream.Position; BinarySerializer.WriteCompressed(dataFileStream, hostCode, DiskCacheCommon.GetCompressionAlgorithm()); + offsetAndSize.CompressedSize = (uint)(dataFileStream.Position - dataStartPosition); + + tocWriter.Write(ref offsetAndSize); + + dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm()); + + for (int index = 0; index < shaders.Length; index++) + { + if (shaders[index] != null) + { + WriteShaderProgramInfo(ref dataWriter, shaders[index].Info); + } + } + + dataWriter.EndCompression(); + if (streams == null) { tocFileStream.Dispose(); diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs new file mode 100644 index 000000000..b1ac819e6 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/GuestCodeAndCbData.cs @@ -0,0 +1,31 @@ +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + /// + /// Guest shader code and constant buffer data accessed by the shader. + /// + struct GuestCodeAndCbData + { + /// + /// Maxwell binary shader code. + /// + public byte[] Code { get; } + + /// + /// Constant buffer 1 data accessed by the shader. + /// + public byte[] Cb1Data { get; } + + /// + /// Creates a new instance of the guest shader code and constant buffer data. + /// + /// Maxwell binary shader code + /// Constant buffer 1 data accessed by the shader + public GuestCodeAndCbData(byte[] code, byte[] cb1Data) + { + Code = code; + Cb1Data = cb1Data; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs index af7579d5d..0b56419e6 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -45,9 +45,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache public readonly CachedShaderProgram CachedProgram; /// - /// Host program. + /// Optional binary code. If not null, it is used instead of the backend host binary. /// - public readonly IProgram HostProgram; + public readonly byte[] BinaryCode; /// /// Program index. @@ -68,19 +68,18 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// Creates a new program validation entry. /// /// Cached shader program - /// Host program /// Program index /// Indicates if the program is a compute shader /// Indicates if the program is a host binary shader public ProgramEntry( CachedShaderProgram cachedProgram, - IProgram hostProgram, + byte[] binaryCode, int programIndex, bool isCompute, bool isBinary) { CachedProgram = cachedProgram; - HostProgram = hostProgram; + BinaryCode = binaryCode; ProgramIndex = programIndex; IsCompute = isCompute; IsBinary = isBinary; @@ -146,9 +145,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private struct AsyncProgramTranslation { /// - /// Cached shader stages. + /// Guest code for each active stage. /// - public readonly CachedShaderStage[] Shaders; + public readonly GuestCodeAndCbData?[] GuestShaders; /// /// Specialization state. @@ -168,17 +167,17 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// /// Creates a new program translation entry. /// - /// Cached shader stages + /// Guest code for each active stage /// Specialization state /// Program index /// Indicates if the program is a compute shader public AsyncProgramTranslation( - CachedShaderStage[] shaders, + GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) { - Shaders = shaders; + GuestShaders = guestShaders; SpecializationState = specState; ProgramIndex = programIndex; IsCompute = isCompute; @@ -188,7 +187,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private readonly Queue _validationQueue; private readonly ConcurrentQueue _compilationQueue; private readonly BlockingCollection _asyncTranslationQueue; - private readonly SortedList _programList; + private readonly SortedList _programList; private int _backendParallelCompileThreads; private int _compiledCount; @@ -220,7 +219,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache _validationQueue = new Queue(); _compilationQueue = new ConcurrentQueue(); _asyncTranslationQueue = new BlockingCollection(ThreadCount); - _programList = new SortedList(); + _programList = new SortedList(); _backendParallelCompileThreads = Math.Min(Environment.ProcessorCount, 8); // Must be kept in sync with the backend code. } @@ -235,7 +234,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache { workThreads[index] = new Thread(ProcessAsyncQueue) { - Name = $"Gpu.AsyncTranslationThread.{index}" + Name = $"GPU.AsyncTranslationThread.{index}" }; } @@ -287,7 +286,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache CheckCompilationBlocking(); - if (_needsHostRegen) + if (_needsHostRegen && Active) { // Rebuild both shared and host cache files. // Rebuilding shared is required because the shader information returned by the translator @@ -310,8 +309,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache break; } - CachedShaderProgram program = kv.Value; - _hostStorage.AddShader(_context, program, program.HostProgram.GetBinary(), streams); + (CachedShaderProgram program, byte[] binaryCode) = kv.Value; + _hostStorage.AddShader(_context, program, binaryCode, streams); } Logger.Info?.Print(LogClass.Gpu, $"Rebuilt {_programList.Count} shaders successfully."); @@ -342,24 +341,31 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// Enqueues a host program for compilation. /// /// Cached program - /// Host program to be compiled + /// Host binary code /// Program index /// Indicates if the program is a compute shader - public void QueueHostProgram(CachedShaderProgram cachedProgram, IProgram hostProgram, int programIndex, bool isCompute) + public void QueueHostProgram(CachedShaderProgram cachedProgram, byte[] binaryCode, int programIndex, bool isCompute) { - EnqueueForValidation(new ProgramEntry(cachedProgram, hostProgram, programIndex, isCompute, isBinary: true)); + EnqueueForValidation(new ProgramEntry(cachedProgram, binaryCode, programIndex, isCompute, isBinary: true)); } /// /// Enqueues a guest program for compilation. /// - /// Cached shader stages + /// Guest code for each active stage /// Specialization state /// Program index /// Indicates if the program is a compute shader - public void QueueGuestProgram(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + public void QueueGuestProgram(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) { - _asyncTranslationQueue.Add(new AsyncProgramTranslation(shaders, specState, programIndex, isCompute)); + try + { + AsyncProgramTranslation asyncTranslation = new AsyncProgramTranslation(guestShaders, specState, programIndex, isCompute); + _asyncTranslationQueue.Add(asyncTranslation, _cancellationToken); + } + catch (OperationCanceledException) + { + } } /// @@ -374,7 +380,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache // If not yet compiled, do nothing. This avoids blocking to wait for shader compilation. while (_validationQueue.TryPeek(out ProgramEntry entry)) { - ProgramLinkStatus result = entry.HostProgram.CheckProgramLink(false); + ProgramLinkStatus result = entry.CachedProgram.HostProgram.CheckProgramLink(false); if (result != ProgramLinkStatus.Incomplete) { @@ -398,7 +404,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache while (_validationQueue.TryDequeue(out ProgramEntry entry) && Active) { - ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false); + ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false); } } @@ -427,7 +433,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache _needsHostRegen = true; } - _programList.Add(entry.ProgramIndex, entry.CachedProgram); + _programList.Add(entry.ProgramIndex, (entry.CachedProgram, entry.BinaryCode)); SignalCompiled(); } else if (entry.IsBinary) @@ -436,13 +442,25 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache // we still have a chance to recompile from the guest binary. CachedShaderProgram program = entry.CachedProgram; + GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[program.Shaders.Length]; + + for (int index = 0; index < program.Shaders.Length; index++) + { + CachedShaderStage shader = program.Shaders[index]; + + if (shader != null) + { + guestShaders[index] = new GuestCodeAndCbData(shader.Code, shader.Cb1Data); + } + } + if (asyncCompile) { - QueueGuestProgram(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + QueueGuestProgram(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); } else { - RecompileFromGuestCode(program.Shaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); + RecompileFromGuestCode(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute); ProcessCompilationQueue(); } } @@ -476,10 +494,16 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache } } - IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, new ShaderInfo(fragmentOutputMap)); + ShaderInfo shaderInfo = compilation.SpecializationState.PipelineState.HasValue + ? new ShaderInfo(fragmentOutputMap, compilation.SpecializationState.PipelineState.Value, fromCache: true) + : new ShaderInfo(fragmentOutputMap, fromCache: true); + + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, shaderInfo); CachedShaderProgram program = new CachedShaderProgram(hostProgram, compilation.SpecializationState, compilation.Shaders); - EnqueueForValidation(new ProgramEntry(program, hostProgram, compilation.ProgramIndex, compilation.IsCompute, isBinary: false)); + byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(shaderSources) : hostProgram.GetBinary(); + + EnqueueForValidation(new ProgramEntry(program, binaryCode, compilation.ProgramIndex, compilation.IsCompute, isBinary: false)); } } @@ -496,7 +520,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache // Submitting more seems to cause NVIDIA OpenGL driver to crash. if (_validationQueue.Count >= _backendParallelCompileThreads && _validationQueue.TryDequeue(out ProgramEntry entry)) { - ProcessCompiledProgram(ref entry, entry.HostProgram.CheckProgramLink(true), asyncCompile: false); + ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false); } } @@ -513,7 +537,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache foreach (AsyncProgramTranslation asyncCompilation in _asyncTranslationQueue.GetConsumingEnumerable(ct)) { RecompileFromGuestCode( - asyncCompilation.Shaders, + asyncCompilation.GuestShaders, asyncCompilation.SpecializationState, asyncCompilation.ProgramIndex, asyncCompilation.IsCompute); @@ -527,21 +551,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// /// Recompiles a program from guest code. /// - /// Shader stages + /// Guest code for each active stage /// Specialization state /// Program index /// Indicates if the program is a compute shader - private void RecompileFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex, bool isCompute) + private void RecompileFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute) { try { if (isCompute) { - RecompileComputeFromGuestCode(shaders, specState, programIndex); + RecompileComputeFromGuestCode(guestShaders, specState, programIndex); } else { - RecompileGraphicsFromGuestCode(shaders, specState, programIndex); + RecompileGraphicsFromGuestCode(guestShaders, specState, programIndex); } } catch (DiskCacheLoadException diskCacheLoadException) @@ -556,41 +580,47 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// /// Recompiles a graphics program from guest code. /// - /// Shader stages + /// Guest code for each active stage /// Specialization state /// Program index - private void RecompileGraphicsFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex) + private void RecompileGraphicsFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex) { - ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.GraphicsState, specState.TransformFeedbackDescriptors); + ShaderSpecializationState newSpecState = new ShaderSpecializationState( + ref specState.GraphicsState, + specState.PipelineState, + specState.TransformFeedbackDescriptors); + ResourceCounts counts = new ResourceCounts(); TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; TranslatorContext nextStage = null; + TargetApi api = _context.Capabilities.Api; + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) { - CachedShaderStage shader = shaders[stageIndex + 1]; - - if (shader != null) + if (guestShaders[stageIndex + 1].HasValue) { + GuestCodeAndCbData shader = guestShaders[stageIndex + 1].Value; + byte[] guestCode = shader.Code; byte[] cb1Data = shader.Cb1Data; DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, guestCode, cb1Data, specState, newSpecState, counts, stageIndex); - TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, DefaultFlags, 0); + TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, 0); if (nextStage != null) { currentStage.SetNextStage(nextStage); } - if (stageIndex == 0 && shaders[0] != null) + if (stageIndex == 0 && guestShaders[0].HasValue) { - byte[] guestCodeA = shaders[0].Code; - byte[] cb1DataA = shaders[0].Cb1Data; + byte[] guestCodeA = guestShaders[0].Value.Code; + byte[] cb1DataA = guestShaders[0].Value.Cb1Data; DiskCacheGpuAccessor gpuAccessorA = new DiskCacheGpuAccessor(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0); - translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, DefaultFlags | TranslationFlags.VertexA, 0); + translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0); } translatorContexts[stageIndex + 1] = currentStage; @@ -598,6 +628,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache } } + CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length]; List translatedStages = new List(); for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) @@ -608,15 +639,15 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache { ShaderProgram program; - byte[] guestCode = shaders[stageIndex + 1].Code; - byte[] cb1Data = shaders[stageIndex + 1].Cb1Data; + byte[] guestCode = guestShaders[stageIndex + 1].Value.Code; + byte[] cb1Data = guestShaders[stageIndex + 1].Value.Cb1Data; - if (stageIndex == 0 && shaders[0] != null) + if (stageIndex == 0 && guestShaders[0].HasValue) { program = currentStage.Translate(translatorContexts[0]); - byte[] guestCodeA = shaders[0].Code; - byte[] cb1DataA = shaders[0].Cb1Data; + byte[] guestCodeA = guestShaders[0].Value.Code; + byte[] cb1DataA = guestShaders[0].Value.Cb1Data; shaders[0] = new CachedShaderStage(null, guestCodeA, cb1DataA); shaders[1] = new CachedShaderStage(program.Info, guestCode, cb1Data); @@ -641,21 +672,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// /// Recompiles a compute program from guest code. /// - /// Shader stages + /// Guest code for each active stage /// Specialization state /// Program index - private void RecompileComputeFromGuestCode(CachedShaderStage[] shaders, ShaderSpecializationState specState, int programIndex) + private void RecompileComputeFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex) { - CachedShaderStage shader = shaders[0]; + GuestCodeAndCbData shader = guestShaders[0].Value; ResourceCounts counts = new ResourceCounts(); - ShaderSpecializationState newSpecState = new ShaderSpecializationState(specState.ComputeState); + ShaderSpecializationState newSpecState = new ShaderSpecializationState(ref specState.ComputeState); DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0); - TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, 0); + TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0); ShaderProgram program = translatorContext.Translate(); - shaders[0] = new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data); + CachedShaderStage[] shaders = new[] { new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data) }; _compilationQueue.Enqueue(new ProgramCompilation(new[] { program }, shaders, newSpecState, programIndex, isCompute: true)); } diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs new file mode 100644 index 000000000..11e54220f --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/ShaderBinarySerializer.cs @@ -0,0 +1,49 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; +using System.IO; + +namespace Ryujinx.Graphics.Gpu.Shader.DiskCache +{ + static class ShaderBinarySerializer + { + public static byte[] Pack(ShaderSource[] sources) + { + using MemoryStream output = new MemoryStream(); + using BinaryWriter writer = new BinaryWriter(output); + + for (int i = 0; i < sources.Length; i++) + { + writer.Write(sources[i].BinaryCode.Length); + writer.Write(sources[i].BinaryCode); + } + + return output.ToArray(); + } + + public static ShaderSource[] Unpack(CachedShaderStage[] stages, byte[] code, bool compute) + { + using MemoryStream input = new MemoryStream(code); + using BinaryReader reader = new BinaryReader(input); + + List output = new List(); + + for (int i = compute ? 0 : 1; i < stages.Length; i++) + { + CachedShaderStage stage = stages[i]; + + if (stage == null) + { + continue; + } + + int binaryCodeLength = reader.ReadInt32(); + byte[] binaryCode = reader.ReadBytes(binaryCodeLength); + + output.Add(new ShaderSource(binaryCode, ShaderCache.GetBindings(stage.Info), stage.Info.Stage, TargetLanguage.Spirv)); + } + + return output.ToArray(); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs index 5cd966af7..a53be4a7f 100644 --- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs @@ -1,6 +1,8 @@ using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; using System.Runtime.InteropServices; @@ -15,6 +17,7 @@ namespace Ryujinx.Graphics.Gpu.Shader private readonly GpuAccessorState _state; private readonly int _stageIndex; private readonly bool _compute; + private readonly bool _isVulkan; /// /// Creates a new instance of the GPU state accessor for graphics shader translation. @@ -23,8 +26,13 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU channel /// Current GPU state /// Graphics shader stage index (0 = Vertex, 4 = Fragment) - public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state, int stageIndex) : base(context) + public GpuAccessor( + GpuContext context, + GpuChannel channel, + GpuAccessorState state, + int stageIndex) : base(context, state.ResourceCounts, stageIndex) { + _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; _channel = channel; _state = state; _stageIndex = stageIndex; @@ -36,7 +44,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU context /// GPU channel /// Current GPU state - public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context) + public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0) { _channel = channel; _state = state; @@ -67,27 +75,36 @@ namespace Ryujinx.Graphics.Gpu.Shader } /// - public int QueryBindingConstantBuffer(int index) + public AlphaTestOp QueryAlphaTestCompare() { - return _state.ResourceCounts.UniformBuffersCount++; + if (!_isVulkan || !_state.GraphicsState.AlphaTestEnable) + { + return AlphaTestOp.Always; + } + + return _state.GraphicsState.AlphaTestCompare switch + { + CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never, + CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less, + CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal, + CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual, + CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater, + CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual, + CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual, + _ => AlphaTestOp.Always + }; } /// - public int QueryBindingStorageBuffer(int index) + public float QueryAlphaTestReference() { - return _state.ResourceCounts.StorageBuffersCount++; + return _state.GraphicsState.AlphaTestReference; } /// - public int QueryBindingTexture(int index) + public AttributeType QueryAttributeType(int location) { - return _state.ResourceCounts.TexturesCount++; - } - - /// - public int QueryBindingImage(int index) - { - return _state.ResourceCounts.ImagesCount++; + return _state.GraphicsState.AttributeTypes[location]; } /// @@ -123,6 +140,18 @@ namespace Ryujinx.Graphics.Gpu.Shader return ConvertToInputTopology(_state.GraphicsState.Topology, _state.GraphicsState.TessellationMode); } + /// + public bool QueryProgramPointSize() + { + return _state.GraphicsState.ProgramPointSizeEnable; + } + + /// + public float QueryPointSize() + { + return _state.GraphicsState.PointSize; + } + /// public bool QueryTessCw() { @@ -192,6 +221,12 @@ namespace Ryujinx.Graphics.Gpu.Shader } } + /// + public bool QueryTransformDepthMinusOneToOne() + { + return _state.GraphicsState.DepthMode; + } + /// public bool QueryTransformFeedbackEnabled() { diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index 5f9dd5880..77b613de7 100644 --- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -1,7 +1,9 @@ +using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; namespace Ryujinx.Graphics.Gpu.Shader { @@ -11,74 +13,139 @@ namespace Ryujinx.Graphics.Gpu.Shader class GpuAccessorBase { private readonly GpuContext _context; + private readonly ResourceCounts _resourceCounts; + private readonly int _stageIndex; /// /// Creates a new GPU accessor. /// /// GPU context - public GpuAccessorBase(GpuContext context) + public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex) { _context = context; + _resourceCounts = resourceCounts; + _stageIndex = stageIndex; } - /// - /// Queries host about the presence of the FrontFacing built-in variable bug. - /// - /// True if the bug is present on the host device used, false otherwise + /// + public int QueryBindingConstantBuffer(int index) + { + if (_context.Capabilities.Api == TargetApi.Vulkan) + { + return 1 + GetBindingFromIndex(index, _context.Capabilities.MaximumUniformBuffersPerStage, "Uniform buffer"); + } + else + { + return _resourceCounts.UniformBuffersCount++; + } + } + + /// + public int QueryBindingStorageBuffer(int index) + { + if (_context.Capabilities.Api == TargetApi.Vulkan) + { + return GetBindingFromIndex(index, _context.Capabilities.MaximumStorageBuffersPerStage, "Storage buffer"); + } + else + { + return _resourceCounts.StorageBuffersCount++; + } + } + + /// + public int QueryBindingTexture(int index, bool isBuffer) + { + if (_context.Capabilities.Api == TargetApi.Vulkan) + { + if (isBuffer) + { + index += (int)_context.Capabilities.MaximumTexturesPerStage; + } + + return GetBindingFromIndex(index, _context.Capabilities.MaximumTexturesPerStage * 2, "Texture"); + } + else + { + return _resourceCounts.TexturesCount++; + } + } + + /// + public int QueryBindingImage(int index, bool isBuffer) + { + if (_context.Capabilities.Api == TargetApi.Vulkan) + { + if (isBuffer) + { + index += (int)_context.Capabilities.MaximumImagesPerStage; + } + + return GetBindingFromIndex(index, _context.Capabilities.MaximumImagesPerStage * 2, "Image"); + } + else + { + return _resourceCounts.ImagesCount++; + } + } + + private int GetBindingFromIndex(int index, uint maxPerStage, string resourceName) + { + if ((uint)index >= maxPerStage) + { + Logger.Error?.Print(LogClass.Gpu, $"{resourceName} index {index} exceeds per stage limit of {maxPerStage}."); + } + + return GetStageIndex() * (int)maxPerStage + index; + } + + private int GetStageIndex() + { + // This is just a simple remapping to ensure that most frequently used shader stages + // have the lowest binding numbers. + // This is useful because if we need to run on a system with a low limit on the bindings, + // then we can still get most games working as the most common shaders will have low binding numbers. + return _stageIndex switch + { + 4 => 1, // Fragment + 3 => 2, // Geometry + 1 => 3, // Tessellation control + 2 => 4, // Tessellation evaluation + _ => 0 // Vertex/Compute + }; + } + + /// public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug; - /// - /// Queries host about the presence of the vector indexing bug. - /// - /// True if the bug is present on the host device used, false otherwise + /// public bool QueryHostHasVectorIndexingBug() => _context.Capabilities.HasVectorIndexingBug; - /// - /// Queries host storage buffer alignment required. - /// - /// Host storage buffer alignment in bytes + /// public int QueryHostStorageBufferOffsetAlignment() => _context.Capabilities.StorageBufferOffsetAlignment; - /// - /// Queries host support for texture formats with BGRA component order (such as BGRA8). - /// - /// True if BGRA formats are supported, false otherwise + /// public bool QueryHostSupportsBgraFormat() => _context.Capabilities.SupportsBgraFormat; - /// - /// Queries host support for fragment shader ordering critical sections on the shader code. - /// - /// True if fragment shader interlock is supported, false otherwise + /// public bool QueryHostSupportsFragmentShaderInterlock() => _context.Capabilities.SupportsFragmentShaderInterlock; - /// - /// Queries host support for fragment shader ordering scoped critical sections on the shader code. - /// - /// True if fragment shader ordering is supported, false otherwise + /// public bool QueryHostSupportsFragmentShaderOrderingIntel() => _context.Capabilities.SupportsFragmentShaderOrderingIntel; - /// - /// Queries host support for readable images without a explicit format declaration on the shader. - /// - /// True if formatted image load is supported, false otherwise + /// + public bool QueryHostSupportsGeometryShaderPassthrough() => _context.Capabilities.SupportsGeometryShaderPassthrough; + + /// public bool QueryHostSupportsImageLoadFormatted() => _context.Capabilities.SupportsImageLoadFormatted; - /// - /// Queries host GPU non-constant texture offset support. - /// - /// True if the GPU and driver supports non-constant texture offsets, false otherwise + /// public bool QueryHostSupportsNonConstantTextureOffset() => _context.Capabilities.SupportsNonConstantTextureOffset; - /// - /// Queries host GPU shader ballot support. - /// - /// True if the GPU and driver supports shader ballot, false otherwise + /// public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot; - /// - /// Queries host GPU texture shadow LOD support. - /// - /// True if the GPU and driver supports texture shadow LOD, false otherwise + /// public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod; /// diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs b/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs index 92ec117f3..f52e040f3 100644 --- a/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs @@ -1,5 +1,7 @@ +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Shader; namespace Ryujinx.Graphics.Gpu.Shader { @@ -30,6 +32,41 @@ namespace Ryujinx.Graphics.Gpu.Shader /// public readonly bool ViewportTransformDisable; + /// + /// Depth mode zero to one or minus one to one. + /// + public readonly bool DepthMode; + + /// + /// Indicates if the point size is set on the shader or is fixed. + /// + public readonly bool ProgramPointSizeEnable; + + /// + /// Point size if not set from shader. + /// + public readonly float PointSize; + + /// + /// Indicates whenever alpha test is enabled. + /// + public readonly bool AlphaTestEnable; + + /// + /// When alpha test is enabled, indicates the comparison that decides if the fragment is discarded. + /// + public readonly CompareOp AlphaTestCompare; + + /// + /// When alpha test is enabled, indicates the value to compare with the fragment output alpha. + /// + public readonly float AlphaTestReference; + + /// + /// Type of the vertex attributes consumed by the shader. + /// + public Array32 AttributeTypes; + /// /// Creates a new GPU graphics state. /// @@ -37,12 +74,37 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Primitive topology /// Tessellation mode /// Indicates whenever the viewport transform is disabled - public GpuChannelGraphicsState(bool earlyZForce, PrimitiveTopology topology, TessMode tessellationMode, bool viewportTransformDisable) + /// Depth mode zero to one or minus one to one + /// Indicates if the point size is set on the shader or is fixed + /// Point size if not set from shader + /// Indicates whenever alpha test is enabled + /// When alpha test is enabled, indicates the comparison that decides if the fragment is discarded + /// When alpha test is enabled, indicates the value to compare with the fragment output alpha + /// Type of the vertex attributes consumed by the shader + public GpuChannelGraphicsState( + bool earlyZForce, + PrimitiveTopology topology, + TessMode tessellationMode, + bool viewportTransformDisable, + bool depthMode, + bool programPointSizeEnable, + float pointSize, + bool alphaTestEnable, + CompareOp alphaTestCompare, + float alphaTestReference, + ref Array32 attributeTypes) { EarlyZForce = earlyZForce; Topology = topology; TessellationMode = tessellationMode; ViewportTransformDisable = viewportTransformDisable; + DepthMode = depthMode; + ProgramPointSizeEnable = programPointSizeEnable; + PointSize = pointSize; + AlphaTestEnable = alphaTestEnable; + AlphaTestCompare = alphaTestCompare; + AlphaTestReference = alphaTestReference; + AttributeTypes = attributeTypes; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index df4b9d128..8834b5a5f 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -1,6 +1,8 @@ using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Shader.Cache; using Ryujinx.Graphics.Gpu.Shader.DiskCache; @@ -8,6 +10,7 @@ using Ryujinx.Graphics.Shader; using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; +using System.Linq; using System.Threading; namespace Ryujinx.Graphics.Gpu.Shader @@ -59,11 +62,13 @@ namespace Ryujinx.Graphics.Gpu.Shader { public readonly CachedShaderProgram CachedProgram; public readonly IProgram HostProgram; + public readonly byte[] BinaryCode; - public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram) + public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram, byte[] binaryCode) { CachedProgram = cachedProgram; HostProgram = hostProgram; + BinaryCode = binaryCode; } } @@ -123,7 +128,7 @@ namespace Ryujinx.Graphics.Gpu.Shader { if (result == ProgramLinkStatus.Success) { - _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.HostProgram.GetBinary()); + _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.BinaryCode ?? programToSave.HostProgram.GetBinary()); } _programsToSaveQueue.Dequeue(); @@ -143,7 +148,9 @@ namespace Ryujinx.Graphics.Gpu.Shader { if (_diskCacheHostStorage.CacheEnabled) { - if (!_diskCacheHostStorage.CacheExists()) + // Migration disabled as Vulkan added a lot of new state, + // most migrated shaders would be unused due to the state not matching. + /* if (!_diskCacheHostStorage.CacheExists()) { // If we don't have a shader cache on the new format, try to perform migration from the old shader cache. Logger.Info?.Print(LogClass.Gpu, "No shader cache found, trying to migrate from legacy shader cache..."); @@ -151,7 +158,7 @@ namespace Ryujinx.Graphics.Gpu.Shader int migrationCount = Migration.MigrateFromLegacyCache(_context, _diskCacheHostStorage); Logger.Info?.Print(LogClass.Gpu, $"Migrated {migrationCount} shaders."); - } + } */ ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader( _context, @@ -210,25 +217,67 @@ namespace Ryujinx.Graphics.Gpu.Shader return cpShader; } - ShaderSpecializationState specState = new ShaderSpecializationState(computeState); + ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState); GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState); GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState); - TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, gpuVa); + TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, gpuVa); TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode); - IProgram hostProgram = _context.Renderer.CreateProgram(new ShaderSource[] { CreateShaderSource(translatedShader.Program) }, new ShaderInfo(-1)); + ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) }; + + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(-1)); cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader); _computeShaderCache.Add(cpShader); - EnqueueProgramToSave(new ProgramToSave(cpShader, hostProgram)); + EnqueueProgramToSave(cpShader, hostProgram, shaderSourcesArray); _cpPrograms[gpuVa] = cpShader; return cpShader; } + private void UpdatePipelineInfo( + ref ThreedClassState state, + ref ProgramPipelineState pipeline, + GpuChannelGraphicsState graphicsState, + GpuChannel channel) + { + channel.TextureManager.UpdateRenderTargets(); + + var rtControl = state.RtControl; + var msaaMode = state.RtMsaaMode; + + pipeline.SamplesCount = msaaMode.SamplesInX() * msaaMode.SamplesInY(); + + int count = rtControl.UnpackCount(); + + for (int index = 0; index < Constants.TotalRenderTargets; index++) + { + int rtIndex = rtControl.UnpackPermutationIndex(index); + + var colorState = state.RtColorState[rtIndex]; + + if (index >= count || colorState.Format == 0 || colorState.WidthOrStride == 0) + { + pipeline.AttachmentEnable[index] = false; + pipeline.AttachmentFormats[index] = Format.R8G8B8A8Unorm; + } + else + { + pipeline.AttachmentEnable[index] = true; + pipeline.AttachmentFormats[index] = colorState.Format.Convert().Format; + } + } + + pipeline.DepthStencilEnable = state.RtDepthStencilEnable; + pipeline.DepthStencilFormat = pipeline.DepthStencilEnable ? state.RtDepthStencilState.Format.Convert().Format : Format.D24UnormS8Uint; + + pipeline.VertexBufferCount = Constants.TotalVertexBuffers; + pipeline.Topology = graphicsState.Topology; + } + /// /// Gets a graphics shader program from the shader cache. /// This includes all the specified shader stages. @@ -237,6 +286,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// This automatically translates, compiles and adds the code to the cache if not present. /// /// GPU state + /// Pipeline state /// GPU channel /// Texture pool state /// 3D engine state @@ -244,6 +294,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Compiled graphics shader code public CachedShaderProgram GetGraphicsShader( ref ThreedClassState state, + ref ProgramPipelineState pipeline, GpuChannel channel, GpuChannelPoolState poolState, GpuChannelGraphicsState graphicsState, @@ -262,7 +313,9 @@ namespace Ryujinx.Graphics.Gpu.Shader TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state); - ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, transformFeedbackDescriptors); + UpdatePipelineInfo(ref state, ref pipeline, graphicsState, channel); + + ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipeline, transformFeedbackDescriptors); GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors); ReadOnlySpan addressesSpan = addresses.AsSpan(); @@ -270,6 +323,8 @@ namespace Ryujinx.Graphics.Gpu.Shader TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; TranslatorContext nextStage = null; + TargetApi api = _context.Capabilities.Api; + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) { ulong gpuVa = addressesSpan[stageIndex + 1]; @@ -277,7 +332,7 @@ namespace Ryujinx.Graphics.Gpu.Shader if (gpuVa != 0) { GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex); - TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, DefaultFlags, gpuVa); + TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, gpuVa); if (nextStage != null) { @@ -286,7 +341,7 @@ namespace Ryujinx.Graphics.Gpu.Shader if (stageIndex == 0 && addresses.VertexA != 0) { - translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA); + translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA); } translatorContexts[stageIndex + 1] = currentStage; @@ -336,13 +391,15 @@ namespace Ryujinx.Graphics.Gpu.Shader } } + ShaderSource[] shaderSourcesArray = shaderSources.ToArray(); + int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1; - IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources.ToArray(), new ShaderInfo(fragmentOutputMap)); + IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(fragmentOutputMap, pipeline)); gpShaders = new CachedShaderProgram(hostProgram, specState, shaders); _graphicsShaderCache.Add(gpShaders); - EnqueueProgramToSave(new ProgramToSave(gpShaders, hostProgram)); + EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray); _gpPrograms[addresses] = gpShaders; return gpShaders; @@ -355,7 +412,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Shader source public static ShaderSource CreateShaderSource(ShaderProgram program) { - return new ShaderSource(program.Code, program.BinaryCode, program.Info.Stage, program.Language); + return new ShaderSource(program.Code, program.BinaryCode, GetBindings(program.Info), program.Info.Stage, program.Language); } /// @@ -364,11 +421,16 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// This will not do anything if disk shader cache is disabled. /// - /// Program to be saved on disk - private void EnqueueProgramToSave(ProgramToSave programToSave) + /// Cached shader program + /// Host program + /// Source for each shader stage + private void EnqueueProgramToSave(CachedShaderProgram program, IProgram hostProgram, ShaderSource[] sources) { if (_diskCacheHostStorage.CacheEnabled) { + byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(sources) : null; + ProgramToSave programToSave = new ProgramToSave(program, hostProgram, binaryCode); + _programsToSaveQueue.Enqueue(programToSave); } } @@ -418,7 +480,7 @@ namespace Ryujinx.Graphics.Gpu.Shader { if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa)) { - return cpShader.SpecializationState.MatchesCompute(channel, poolState); + return cpShader.SpecializationState.MatchesCompute(channel, poolState, true); } return false; @@ -454,7 +516,7 @@ namespace Ryujinx.Graphics.Gpu.Shader } } - return gpShaders.SpecializationState.MatchesGraphics(channel, poolState, graphicsState); + return gpShaders.SpecializationState.MatchesGraphics(channel, poolState, graphicsState, true); } /// @@ -480,11 +542,12 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Decode the binary Maxwell shader code to a translator context. /// /// GPU state accessor + /// Graphics API that will be used with the shader /// GPU virtual address of the binary shader code /// The generated translator context - public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, ulong gpuVa) + public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, TargetApi api, ulong gpuVa) { - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute); + var options = CreateTranslationOptions(api, DefaultFlags | TranslationFlags.Compute); return Translator.CreateContext(gpuVa, gpuAccessor, options); } @@ -495,12 +558,13 @@ namespace Ryujinx.Graphics.Gpu.Shader /// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader. /// /// GPU state accessor + /// Graphics API that will be used with the shader /// Flags that controls shader translation /// GPU virtual address of the shader code /// The generated translator context - public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TranslationFlags flags, ulong gpuVa) + public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TargetApi api, TranslationFlags flags, ulong gpuVa) { - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags); + var options = CreateTranslationOptions(api, flags); return Translator.CreateContext(gpuVa, gpuAccessor, options); } @@ -595,6 +659,29 @@ namespace Ryujinx.Graphics.Gpu.Shader }; } + public static ShaderBindings GetBindings(ShaderProgramInfo info) + { + var uniformBufferBindings = info.CBuffers.Select(x => x.Binding).ToArray(); + var storageBufferBindings = info.SBuffers.Select(x => x.Binding).ToArray(); + var textureBindings = info.Textures.Select(x => x.Binding).ToArray(); + var imageBindings = info.Images.Select(x => x.Binding).ToArray(); + + return new ShaderBindings( + uniformBufferBindings, + storageBufferBindings, + textureBindings, + imageBindings); + } + + private static TranslationOptions CreateTranslationOptions(TargetApi api, TranslationFlags flags) + { + TargetLanguage lang = GraphicsConfig.EnableSpirvCompilationOnVulkan && api == TargetApi.Vulkan + ? TargetLanguage.Spirv + : TargetLanguage.Glsl; + + return new TranslationOptions(lang, api, flags); + } + /// /// Disposes the shader cache, deleting all the cached shaders. /// It's an error to use the shader cache after disposal. diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs index e3e57d745..43ccd892c 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs @@ -35,7 +35,7 @@ namespace Ryujinx.Graphics.Gpu.Shader { foreach (var entry in _entries) { - if (entry.SpecializationState.MatchesGraphics(channel, poolState, graphicsState)) + if (entry.SpecializationState.MatchesGraphics(channel, poolState, graphicsState, true)) { program = entry; return true; @@ -57,7 +57,7 @@ namespace Ryujinx.Graphics.Gpu.Shader { foreach (var entry in _entries) { - if (entry.SpecializationState.MatchesCompute(channel, poolState)) + if (entry.SpecializationState.MatchesCompute(channel, poolState, true)) { program = entry; return true; diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs index 418c7b1a7..3df9d1199 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs @@ -1,9 +1,15 @@ using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Shader.DiskCache; using Ryujinx.Graphics.Shader; using System; using System.Collections.Generic; +using System.Linq; using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Shader { @@ -14,6 +20,7 @@ namespace Ryujinx.Graphics.Gpu.Shader private const uint TfbdMagic = (byte)'T' | ((byte)'F' << 8) | ((byte)'B' << 16) | ((byte)'D' << 24); private const uint TexkMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'K' << 24); private const uint TexsMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24); + private const uint PgpsMagic = (byte)'P' | ((byte)'G' << 8) | ((byte)'P' << 16) | ((byte)'S' << 24); /// /// Flags indicating GPU state that is used by the shader. @@ -46,6 +53,11 @@ namespace Ryujinx.Graphics.Gpu.Shader /// public Array5 ConstantBufferUse; + /// + /// Optional pipeline state captured at the time of the shader use. + /// + public ProgramPipelineState? PipelineState; + /// /// Transform feedback buffers active at the time the shader was compiled. /// @@ -158,6 +170,9 @@ namespace Ryujinx.Graphics.Gpu.Shader } private readonly Dictionary> _textureSpecialization; + private KeyValuePair>[] _allTextures; + private Box[][] _textureByBinding; + private Box[][] _imageByBinding; /// /// Creates a new instance of the shader specialization state. @@ -171,7 +186,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Creates a new instance of the shader specialization state. /// /// Current compute engine state - public ShaderSpecializationState(GpuChannelComputeState state) : this() + public ShaderSpecializationState(ref GpuChannelComputeState state) : this() { ComputeState = state; _compute = true; @@ -182,7 +197,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Current 3D engine state /// Optional transform feedback buffers in use, if any - public ShaderSpecializationState(GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this() + private ShaderSpecializationState(ref GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this() { GraphicsState = state; _compute = false; @@ -194,6 +209,76 @@ namespace Ryujinx.Graphics.Gpu.Shader } } + /// + /// Prepare the shader specialization state for quick binding lookups. + /// + /// The shader stages + public void Prepare(CachedShaderStage[] stages) + { + _allTextures = _textureSpecialization.ToArray(); + + _textureByBinding = new Box[stages.Length][]; + _imageByBinding = new Box[stages.Length][]; + + for (int i = 0; i < stages.Length; i++) + { + CachedShaderStage stage = stages[i]; + if (stage?.Info != null) + { + var textures = stage.Info.Textures; + var images = stage.Info.Images; + + var texBindings = new Box[textures.Count]; + var imageBindings = new Box[images.Count]; + + int stageIndex = Math.Max(i - 1, 0); // Don't count VertexA for looking up spec state. No-Op for compute. + + for (int j = 0; j < textures.Count; j++) + { + var texture = textures[j]; + texBindings[j] = GetTextureSpecState(stageIndex, texture.HandleIndex, texture.CbufSlot); + } + + for (int j = 0; j < images.Count; j++) + { + var image = images[j]; + imageBindings[j] = GetTextureSpecState(stageIndex, image.HandleIndex, image.CbufSlot); + } + + _textureByBinding[i] = texBindings; + _imageByBinding[i] = imageBindings; + } + } + } + + /// + /// Creates a new instance of the shader specialization state. + /// + /// Current 3D engine state + /// Current program pipeline state + /// Optional transform feedback buffers in use, if any + public ShaderSpecializationState( + ref GpuChannelGraphicsState state, + ref ProgramPipelineState pipelineState, + TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors) + { + PipelineState = pipelineState; + } + + /// + /// Creates a new instance of the shader specialization state. + /// + /// Current 3D engine state + /// Current program pipeline state + /// Optional transform feedback buffers in use, if any + public ShaderSpecializationState( + ref GpuChannelGraphicsState state, + ProgramPipelineState? pipelineState, + TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors) + { + PipelineState = pipelineState; + } + /// /// Indicates that the shader accesses the early Z force state. /// @@ -396,15 +481,38 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU channel /// Texture pool state /// Graphics state + /// Indicates whether texture descriptors should be checked /// True if the state matches, false otherwise - public bool MatchesGraphics(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelGraphicsState graphicsState) + public bool MatchesGraphics(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelGraphicsState graphicsState, bool checkTextures) { if (graphicsState.ViewportTransformDisable != GraphicsState.ViewportTransformDisable) { return false; } - return Matches(channel, poolState, isCompute: false); + if (graphicsState.DepthMode != GraphicsState.DepthMode) + { + return false; + } + + if (graphicsState.AlphaTestEnable != GraphicsState.AlphaTestEnable) + { + return false; + } + + if (graphicsState.AlphaTestEnable && + (graphicsState.AlphaTestCompare != GraphicsState.AlphaTestCompare || + graphicsState.AlphaTestReference != GraphicsState.AlphaTestReference)) + { + return false; + } + + if (!graphicsState.AttributeTypes.ToSpan().SequenceEqual(GraphicsState.AttributeTypes.ToSpan())) + { + return false; + } + + return Matches(channel, poolState, checkTextures, isCompute: false); } /// @@ -412,10 +520,64 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// GPU channel /// Texture pool state + /// Indicates whether texture descriptors should be checked /// True if the state matches, false otherwise - public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState) + public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState, bool checkTextures) { - return Matches(channel, poolState, isCompute: true); + return Matches(channel, poolState, checkTextures, isCompute: true); + } + + /// + /// Fetch the constant buffers used for a texture to cache. + /// + /// GPU channel + /// Indicates whenever the check is requested by the 3D or compute engine + /// The currently cached texture buffer index + /// The currently cached sampler buffer index + /// The currently cached texture buffer data + /// The currently cached sampler buffer data + /// The currently cached stage + /// The new texture buffer index + /// The new sampler buffer index + /// Stage index of the constant buffer + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void UpdateCachedBuffer( + GpuChannel channel, + bool isCompute, + ref int cachedTextureBufferIndex, + ref int cachedSamplerBufferIndex, + ref ReadOnlySpan cachedTextureBuffer, + ref ReadOnlySpan cachedSamplerBuffer, + ref int cachedStageIndex, + int textureBufferIndex, + int samplerBufferIndex, + int stageIndex) + { + bool stageChange = stageIndex != cachedStageIndex; + + if (stageChange || textureBufferIndex != cachedTextureBufferIndex) + { + ref BufferBounds bounds = ref channel.BufferManager.GetUniformBufferBounds(isCompute, stageIndex, textureBufferIndex); + + cachedTextureBuffer = MemoryMarshal.Cast(channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size)); + cachedTextureBufferIndex = textureBufferIndex; + + if (samplerBufferIndex == textureBufferIndex) + { + cachedSamplerBuffer = cachedTextureBuffer; + cachedSamplerBufferIndex = samplerBufferIndex; + } + } + + if (stageChange || samplerBufferIndex != cachedSamplerBufferIndex) + { + ref BufferBounds bounds = ref channel.BufferManager.GetUniformBufferBounds(isCompute, stageIndex, samplerBufferIndex); + + cachedSamplerBuffer = MemoryMarshal.Cast(channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size)); + cachedSamplerBufferIndex = samplerBufferIndex; + } + + cachedStageIndex = stageIndex; } /// @@ -423,9 +585,10 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// GPU channel /// Texture pool state + /// Indicates whether texture descriptors should be checked /// Indicates whenever the check is requested by the 3D or compute engine /// True if the state matches, false otherwise - private bool Matches(GpuChannel channel, GpuChannelPoolState poolState, bool isCompute) + private bool Matches(GpuChannel channel, GpuChannelPoolState poolState, bool checkTextures, bool isCompute) { int constantBufferUsePerStageMask = _constantBufferUsePerStage; @@ -445,55 +608,60 @@ namespace Ryujinx.Graphics.Gpu.Shader constantBufferUsePerStageMask &= ~(1 << index); } - foreach (var kv in _textureSpecialization) + if (checkTextures) { - TextureKey textureKey = kv.Key; + TexturePool pool = channel.TextureManager.GetTexturePool(poolState.TexturePoolGpuVa, poolState.TexturePoolMaximumId); - (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(textureKey.CbufSlot, poolState.TextureBufferIndex); + int cachedTextureBufferIndex = -1; + int cachedSamplerBufferIndex = -1; + int cachedStageIndex = -1; + ReadOnlySpan cachedTextureBuffer = Span.Empty; + ReadOnlySpan cachedSamplerBuffer = Span.Empty; - ulong textureCbAddress; - ulong samplerCbAddress; - - if (isCompute) + foreach (var kv in _allTextures) { - textureCbAddress = channel.BufferManager.GetComputeUniformBufferAddress(textureBufferIndex); - samplerCbAddress = channel.BufferManager.GetComputeUniformBufferAddress(samplerBufferIndex); - } - else - { - textureCbAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(textureKey.StageIndex, textureBufferIndex); - samplerCbAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(textureKey.StageIndex, samplerBufferIndex); - } + TextureKey textureKey = kv.Key; - if (!channel.MemoryManager.Physical.IsMapped(textureCbAddress) || !channel.MemoryManager.Physical.IsMapped(samplerCbAddress)) - { - continue; + (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(textureKey.CbufSlot, poolState.TextureBufferIndex); + + UpdateCachedBuffer(channel, + isCompute, + ref cachedTextureBufferIndex, + ref cachedSamplerBufferIndex, + ref cachedTextureBuffer, + ref cachedSamplerBuffer, + ref cachedStageIndex, + textureBufferIndex, + samplerBufferIndex, + textureKey.StageIndex); + + int packedId = TextureHandle.ReadPackedId(textureKey.Handle, cachedTextureBuffer, cachedSamplerBuffer); + + int textureId = TextureHandle.UnpackTextureId(packedId); + + ref readonly Image.TextureDescriptor descriptor = ref pool.GetDescriptorRef(textureId); + + if (!MatchesTexture(kv.Value, descriptor)) + { + return false; + } } + } - Image.TextureDescriptor descriptor; - - if (isCompute) - { - descriptor = channel.TextureManager.GetComputeTextureDescriptor( - poolState.TexturePoolGpuVa, - poolState.TextureBufferIndex, - poolState.TexturePoolMaximumId, - textureKey.Handle, - textureKey.CbufSlot); - } - else - { - descriptor = channel.TextureManager.GetGraphicsTextureDescriptor( - poolState.TexturePoolGpuVa, - poolState.TextureBufferIndex, - poolState.TexturePoolMaximumId, - textureKey.StageIndex, - textureKey.Handle, - textureKey.CbufSlot); - } - - Box specializationState = kv.Value; + return true; + } + /// + /// Checks if the recorded texture state matches the given texture descriptor. + /// + /// Texture specialization state + /// Texture descriptor + /// True if the state matches, false otherwise + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool MatchesTexture(Box specializationState, in Image.TextureDescriptor descriptor) + { + if (specializationState != null) + { if (specializationState.Value.QueriedFlags.HasFlag(QueriedTextureStateFlags.CoordNormalized) && specializationState.Value.CoordNormalized != descriptor.UnpackTextureCoordNormalized()) { @@ -504,6 +672,34 @@ namespace Ryujinx.Graphics.Gpu.Shader return true; } + /// + /// Checks if the recorded texture state for a given texture binding matches a texture descriptor. + /// + /// The shader stage + /// The texture index + /// Texture descriptor + /// True if the state matches, false otherwise + public bool MatchesTexture(ShaderStage stage, int index, in Image.TextureDescriptor descriptor) + { + Box specializationState = _textureByBinding[(int)stage][index]; + + return MatchesTexture(specializationState, descriptor); + } + + /// + /// Checks if the recorded texture state for a given image binding matches a texture descriptor. + /// + /// The shader stage + /// The texture index + /// Texture descriptor + /// True if the state matches, false otherwise + public bool MatchesImage(ShaderStage stage, int index, in Image.TextureDescriptor descriptor) + { + Box specializationState = _imageByBinding[(int)stage][index]; + + return MatchesTexture(specializationState, descriptor); + } + /// /// Reads shader specialization state that has been serialized. /// @@ -536,6 +732,17 @@ namespace Ryujinx.Graphics.Gpu.Shader constantBufferUsePerStageMask &= ~(1 << index); } + bool hasPipelineState = false; + + dataReader.Read(ref hasPipelineState); + + if (hasPipelineState) + { + ProgramPipelineState pipelineState = default; + dataReader.ReadWithMagicAndSize(ref pipelineState, PgpsMagic); + specState.PipelineState = pipelineState; + } + if (specState._queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) { ushort tfCount = 0; @@ -594,6 +801,16 @@ namespace Ryujinx.Graphics.Gpu.Shader constantBufferUsePerStageMask &= ~(1 << index); } + bool hasPipelineState = PipelineState.HasValue; + + dataWriter.Write(ref hasPipelineState); + + if (hasPipelineState) + { + ProgramPipelineState pipelineState = PipelineState.Value; + dataWriter.WriteWithMagicAndSize(ref pipelineState, PgpsMagic); + } + if (_queriedState.HasFlag(QueriedStateFlags.TransformFeedback)) { ushort tfCount = (ushort)TransformFeedbackDescriptors.Length; diff --git a/Ryujinx.Graphics.OpenGL/BackgroundContextWorker.cs b/Ryujinx.Graphics.OpenGL/BackgroundContextWorker.cs index 3f1c055bf..764ea7159 100644 --- a/Ryujinx.Graphics.OpenGL/BackgroundContextWorker.cs +++ b/Ryujinx.Graphics.OpenGL/BackgroundContextWorker.cs @@ -1,4 +1,4 @@ -using Ryujinx.Common; +using Ryujinx.Common; using System; using System.Collections.Generic; using System.Threading; diff --git a/Ryujinx.Graphics.OpenGL/FormatTable.cs b/Ryujinx.Graphics.OpenGL/FormatTable.cs index 1a739b5ce..ea710b42c 100644 --- a/Ryujinx.Graphics.OpenGL/FormatTable.cs +++ b/Ryujinx.Graphics.OpenGL/FormatTable.cs @@ -88,16 +88,14 @@ namespace Ryujinx.Graphics.OpenGL Add(Format.Bc3Srgb, new FormatInfo(4, false, false, All.CompressedSrgbAlphaS3tcDxt5Ext)); Add(Format.Bc4Unorm, new FormatInfo(1, true, false, All.CompressedRedRgtc1)); Add(Format.Bc4Snorm, new FormatInfo(1, true, false, All.CompressedSignedRedRgtc1)); - Add(Format.Bc5Unorm, new FormatInfo(2, true, false, All.CompressedRgRgtc2)); - Add(Format.Bc5Snorm, new FormatInfo(2, true, false, All.CompressedSignedRgRgtc2)); - Add(Format.Bc7Unorm, new FormatInfo(4, true, false, All.CompressedRgbaBptcUnorm)); - Add(Format.Bc7Srgb, new FormatInfo(4, false, false, All.CompressedSrgbAlphaBptcUnorm)); - Add(Format.Bc6HSfloat, new FormatInfo(4, false, false, All.CompressedRgbBptcSignedFloat)); - Add(Format.Bc6HUfloat, new FormatInfo(4, false, false, All.CompressedRgbBptcUnsignedFloat)); - Add(Format.Etc2RgbUnorm, new FormatInfo(3, false, false, All.CompressedRgb8Etc2)); - Add(Format.Etc2RgbaUnorm, new FormatInfo(4, false, false, All.CompressedRgba8Etc2Eac)); - Add(Format.Etc2RgbSrgb, new FormatInfo(3, false, false, All.CompressedSrgb8Etc2)); - Add(Format.Etc2RgbaSrgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Etc2Eac)); + Add(Format.Bc5Unorm, new FormatInfo(1, true, false, All.CompressedRgRgtc2)); + Add(Format.Bc5Snorm, new FormatInfo(1, true, false, All.CompressedSignedRgRgtc2)); + Add(Format.Bc7Unorm, new FormatInfo(1, true, false, All.CompressedRgbaBptcUnorm)); + Add(Format.Bc7Srgb, new FormatInfo(1, false, false, All.CompressedSrgbAlphaBptcUnorm)); + Add(Format.Bc6HSfloat, new FormatInfo(1, false, false, All.CompressedRgbBptcSignedFloat)); + Add(Format.Bc6HUfloat, new FormatInfo(1, false, false, All.CompressedRgbBptcUnsignedFloat)); + Add(Format.Etc2RgbaUnorm, new FormatInfo(1, false, false, All.CompressedRgba8Etc2Eac)); + Add(Format.Etc2RgbaSrgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Etc2Eac)); Add(Format.R8Uscaled, new FormatInfo(1, false, true, All.R8ui, PixelFormat.RedInteger, PixelType.UnsignedByte)); Add(Format.R8Sscaled, new FormatInfo(1, false, true, All.R8i, PixelFormat.RedInteger, PixelType.Byte)); Add(Format.R16Uscaled, new FormatInfo(1, false, true, All.R16ui, PixelFormat.RedInteger, PixelType.UnsignedShort)); @@ -138,34 +136,34 @@ namespace Ryujinx.Graphics.OpenGL Add(Format.R32G32B32X32Float, new FormatInfo(4, false, false, All.Rgb32f, PixelFormat.Rgba, PixelType.Float)); Add(Format.R32G32B32X32Uint, new FormatInfo(4, false, false, All.Rgb32ui, PixelFormat.RgbaInteger, PixelType.UnsignedInt)); Add(Format.R32G32B32X32Sint, new FormatInfo(4, false, false, All.Rgb32i, PixelFormat.RgbaInteger, PixelType.Int)); - Add(Format.Astc4x4Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc4X4Khr)); - Add(Format.Astc5x4Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc5X4Khr)); - Add(Format.Astc5x5Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc5X5Khr)); - Add(Format.Astc6x5Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc6X5Khr)); - Add(Format.Astc6x6Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc6X6Khr)); - Add(Format.Astc8x5Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc8X5Khr)); - Add(Format.Astc8x6Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc8X6Khr)); - Add(Format.Astc8x8Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc8X8Khr)); - Add(Format.Astc10x5Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc10X5Khr)); - Add(Format.Astc10x6Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc10X6Khr)); - Add(Format.Astc10x8Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc10X8Khr)); - Add(Format.Astc10x10Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc10X10Khr)); - Add(Format.Astc12x10Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc12X10Khr)); - Add(Format.Astc12x12Unorm, new FormatInfo(4, true, false, All.CompressedRgbaAstc12X12Khr)); - Add(Format.Astc4x4Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc4X4Khr)); - Add(Format.Astc5x4Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc5X4Khr)); - Add(Format.Astc5x5Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc5X5Khr)); - Add(Format.Astc6x5Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc6X5Khr)); - Add(Format.Astc6x6Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc6X6Khr)); - Add(Format.Astc8x5Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc8X5Khr)); - Add(Format.Astc8x6Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc8X6Khr)); - Add(Format.Astc8x8Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc8X8Khr)); - Add(Format.Astc10x5Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc10X5Khr)); - Add(Format.Astc10x6Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc10X6Khr)); - Add(Format.Astc10x8Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc10X8Khr)); - Add(Format.Astc10x10Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc10X10Khr)); - Add(Format.Astc12x10Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc12X10Khr)); - Add(Format.Astc12x12Srgb, new FormatInfo(4, false, false, All.CompressedSrgb8Alpha8Astc12X12Khr)); + Add(Format.Astc4x4Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc4X4Khr)); + Add(Format.Astc5x4Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc5X4Khr)); + Add(Format.Astc5x5Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc5X5Khr)); + Add(Format.Astc6x5Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc6X5Khr)); + Add(Format.Astc6x6Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc6X6Khr)); + Add(Format.Astc8x5Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc8X5Khr)); + Add(Format.Astc8x6Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc8X6Khr)); + Add(Format.Astc8x8Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc8X8Khr)); + Add(Format.Astc10x5Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc10X5Khr)); + Add(Format.Astc10x6Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc10X6Khr)); + Add(Format.Astc10x8Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc10X8Khr)); + Add(Format.Astc10x10Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc10X10Khr)); + Add(Format.Astc12x10Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc12X10Khr)); + Add(Format.Astc12x12Unorm, new FormatInfo(1, true, false, All.CompressedRgbaAstc12X12Khr)); + Add(Format.Astc4x4Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc4X4Khr)); + Add(Format.Astc5x4Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc5X4Khr)); + Add(Format.Astc5x5Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc5X5Khr)); + Add(Format.Astc6x5Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc6X5Khr)); + Add(Format.Astc6x6Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc6X6Khr)); + Add(Format.Astc8x5Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc8X5Khr)); + Add(Format.Astc8x6Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc8X6Khr)); + Add(Format.Astc8x8Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc8X8Khr)); + Add(Format.Astc10x5Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc10X5Khr)); + Add(Format.Astc10x6Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc10X6Khr)); + Add(Format.Astc10x8Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc10X8Khr)); + Add(Format.Astc10x10Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc10X10Khr)); + Add(Format.Astc12x10Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc12X10Khr)); + Add(Format.Astc12x12Srgb, new FormatInfo(1, false, false, All.CompressedSrgb8Alpha8Astc12X12Khr)); Add(Format.B5G6R5Unorm, new FormatInfo(3, true, false, All.Rgb565, PixelFormat.Rgb, PixelType.UnsignedShort565Reversed)); Add(Format.B5G5R5X1Unorm, new FormatInfo(4, true, false, All.Rgb5, PixelFormat.Rgba, PixelType.UnsignedShort1555Reversed)); Add(Format.B5G5R5A1Unorm, new FormatInfo(4, true, false, All.Rgb5A1, PixelFormat.Rgba, PixelType.UnsignedShort1555Reversed)); diff --git a/Ryujinx.Graphics.OpenGL/Framebuffer.cs b/Ryujinx.Graphics.OpenGL/Framebuffer.cs index da928b4c8..dafa76723 100644 --- a/Ryujinx.Graphics.OpenGL/Framebuffer.cs +++ b/Ryujinx.Graphics.OpenGL/Framebuffer.cs @@ -9,10 +9,13 @@ namespace Ryujinx.Graphics.OpenGL class Framebuffer : IDisposable { public int Handle { get; private set; } + private int _clearFbHandle; + private bool _clearFbInitialized; private FramebufferAttachment _lastDsAttachment; private readonly TextureView[] _colors; + private TextureView _depthStencil; private int _colorsCount; private bool _dualSourceBlend; @@ -20,6 +23,7 @@ namespace Ryujinx.Graphics.OpenGL public Framebuffer() { Handle = GL.GenFramebuffer(); + _clearFbHandle = GL.GenFramebuffer(); _colors = new TextureView[8]; } @@ -55,20 +59,7 @@ namespace Ryujinx.Graphics.OpenGL if (depthStencil != null) { - FramebufferAttachment attachment; - - if (IsPackedDepthStencilFormat(depthStencil.Format)) - { - attachment = FramebufferAttachment.DepthStencilAttachment; - } - else if (IsDepthOnlyFormat(depthStencil.Format)) - { - attachment = FramebufferAttachment.DepthAttachment; - } - else - { - attachment = FramebufferAttachment.StencilAttachment; - } + FramebufferAttachment attachment = GetAttachment(depthStencil.Format); GL.FramebufferTexture( FramebufferTarget.Framebuffer, @@ -82,6 +73,8 @@ namespace Ryujinx.Graphics.OpenGL { _lastDsAttachment = 0; } + + _depthStencil = depthStencil; } public void SetDualSourceBlend(bool enable) @@ -124,6 +117,22 @@ namespace Ryujinx.Graphics.OpenGL GL.DrawBuffers(colorsCount, drawBuffers); } + private static FramebufferAttachment GetAttachment(Format format) + { + if (IsPackedDepthStencilFormat(format)) + { + return FramebufferAttachment.DepthStencilAttachment; + } + else if (IsDepthOnlyFormat(format)) + { + return FramebufferAttachment.DepthAttachment; + } + else + { + return FramebufferAttachment.StencilAttachment; + } + } + private static bool IsPackedDepthStencilFormat(Format format) { return format == Format.D24UnormS8Uint || @@ -136,6 +145,78 @@ namespace Ryujinx.Graphics.OpenGL return format == Format.D16Unorm || format == Format.D32Float; } + public void AttachColorLayerForClear(int index, int layer) + { + TextureView color = _colors[index]; + + if (!IsLayered(color)) + { + return; + } + + BindClearFb(); + GL.FramebufferTextureLayer(FramebufferTarget.Framebuffer, FramebufferAttachment.ColorAttachment0 + index, color.Handle, 0, layer); + } + + public void DetachColorLayerForClear(int index) + { + TextureView color = _colors[index]; + + if (!IsLayered(color)) + { + return; + } + + GL.FramebufferTexture(FramebufferTarget.Framebuffer, FramebufferAttachment.ColorAttachment0 + index, 0, 0); + Bind(); + } + + public void AttachDepthStencilLayerForClear(int layer) + { + TextureView depthStencil = _depthStencil; + + if (!IsLayered(depthStencil)) + { + return; + } + + BindClearFb(); + GL.FramebufferTextureLayer(FramebufferTarget.Framebuffer, GetAttachment(depthStencil.Format), depthStencil.Handle, 0, layer); + } + + public void DetachDepthStencilLayerForClear() + { + TextureView depthStencil = _depthStencil; + + if (!IsLayered(depthStencil)) + { + return; + } + + GL.FramebufferTexture(FramebufferTarget.Framebuffer, GetAttachment(depthStencil.Format), 0, 0); + Bind(); + } + + private void BindClearFb() + { + GL.BindFramebuffer(FramebufferTarget.Framebuffer, _clearFbHandle); + + if (!_clearFbInitialized) + { + SetDrawBuffersImpl(Constants.MaxRenderTargets); + _clearFbInitialized = true; + } + } + + private static bool IsLayered(TextureView view) + { + return view != null && + view.Target != Target.Texture1D && + view.Target != Target.Texture2D && + view.Target != Target.Texture2DMultisample && + view.Target != Target.TextureBuffer; + } + public void Dispose() { if (Handle != 0) @@ -144,6 +225,13 @@ namespace Ryujinx.Graphics.OpenGL Handle = 0; } + + if (_clearFbHandle != 0) + { + GL.DeleteFramebuffer(_clearFbHandle); + + _clearFbHandle = 0; + } } } } diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index 773c9f634..1e36b1cf9 100644 --- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -9,6 +9,7 @@ namespace Ryujinx.Graphics.OpenGL private static readonly Lazy _supportsDrawTexture = new Lazy(() => HasExtension("GL_NV_draw_texture")); private static readonly Lazy _supportsFragmentShaderInterlock = new Lazy(() => HasExtension("GL_ARB_fragment_shader_interlock")); private static readonly Lazy _supportsFragmentShaderOrdering = new Lazy(() => HasExtension("GL_INTEL_fragment_shader_ordering")); + private static readonly Lazy _supportsGeometryShaderPassthrough = new Lazy(() => HasExtension("GL_NV_geometry_shader_passthrough")); private static readonly Lazy _supportsImageLoadFormatted = new Lazy(() => HasExtension("GL_EXT_shader_image_load_formatted")); private static readonly Lazy _supportsIndirectParameters = new Lazy(() => HasExtension("GL_ARB_indirect_parameters")); private static readonly Lazy _supportsParallelShaderCompile = new Lazy(() => HasExtension("GL_ARB_parallel_shader_compile")); @@ -16,6 +17,9 @@ namespace Ryujinx.Graphics.OpenGL private static readonly Lazy _supportsQuads = new Lazy(SupportsQuadsCheck); private static readonly Lazy _supportsSeamlessCubemapPerTexture = new Lazy(() => HasExtension("GL_ARB_seamless_cubemap_per_texture")); private static readonly Lazy _supportsShaderBallot = new Lazy(() => HasExtension("GL_ARB_shader_ballot")); + private static readonly Lazy _supportsTextureCompressionBptc = new Lazy(() => HasExtension("GL_EXT_texture_compression_bptc")); + private static readonly Lazy _supportsTextureCompressionRgtc = new Lazy(() => HasExtension("GL_EXT_texture_compression_rgtc")); + private static readonly Lazy _supportsTextureCompressionS3tc = new Lazy(() => HasExtension("GL_EXT_texture_compression_s3tc")); private static readonly Lazy _supportsTextureShadowLod = new Lazy(() => HasExtension("GL_EXT_texture_shadow_lod")); private static readonly Lazy _supportsViewportSwizzle = new Lazy(() => HasExtension("GL_NV_viewport_swizzle")); @@ -47,6 +51,7 @@ namespace Ryujinx.Graphics.OpenGL public static bool SupportsDrawTexture => _supportsDrawTexture.Value; public static bool SupportsFragmentShaderInterlock => _supportsFragmentShaderInterlock.Value; public static bool SupportsFragmentShaderOrdering => _supportsFragmentShaderOrdering.Value; + public static bool SupportsGeometryShaderPassthrough => _supportsGeometryShaderPassthrough.Value; public static bool SupportsImageLoadFormatted => _supportsImageLoadFormatted.Value; public static bool SupportsIndirectParameters => _supportsIndirectParameters.Value; public static bool SupportsParallelShaderCompile => _supportsParallelShaderCompile.Value; @@ -54,6 +59,9 @@ namespace Ryujinx.Graphics.OpenGL public static bool SupportsQuads => _supportsQuads.Value; public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value; public static bool SupportsShaderBallot => _supportsShaderBallot.Value; + public static bool SupportsTextureCompressionBptc => _supportsTextureCompressionBptc.Value; + public static bool SupportsTextureCompressionRgtc => _supportsTextureCompressionRgtc.Value; + public static bool SupportsTextureCompressionS3tc => _supportsTextureCompressionS3tc.Value; public static bool SupportsTextureShadowLod => _supportsTextureShadowLod.Value; public static bool SupportsViewportSwizzle => _supportsViewportSwizzle.Value; diff --git a/Ryujinx.Graphics.OpenGL/Pipeline.cs b/Ryujinx.Graphics.OpenGL/Pipeline.cs index ced41d48f..3ce4c141b 100644 --- a/Ryujinx.Graphics.OpenGL/Pipeline.cs +++ b/Ryujinx.Graphics.OpenGL/Pipeline.cs @@ -43,7 +43,7 @@ namespace Ryujinx.Graphics.OpenGL private CounterQueueEvent _activeConditionalRender; private Vector4[] _fpIsBgra = new Vector4[SupportBuffer.FragmentIsBgraCount]; - private Vector4[] _renderScale = new Vector4[65]; + private Vector4[] _renderScale = new Vector4[73]; private int _fragmentScaleCount; private TextureBase _unit0Texture; @@ -110,7 +110,7 @@ namespace Ryujinx.Graphics.OpenGL Buffer.Clear(destination, offset, size, value); } - public void ClearRenderTargetColor(int index, uint componentMask, ColorF color) + public void ClearRenderTargetColor(int index, int layer, uint componentMask, ColorF color) { GL.ColorMask( index, @@ -119,14 +119,18 @@ namespace Ryujinx.Graphics.OpenGL (componentMask & 4) != 0, (componentMask & 8) != 0); + _framebuffer.AttachColorLayerForClear(index, layer); + float[] colors = new float[] { color.Red, color.Green, color.Blue, color.Alpha }; GL.ClearBuffer(OpenTK.Graphics.OpenGL.ClearBuffer.Color, index, colors); + _framebuffer.DetachColorLayerForClear(index); + RestoreComponentMask(index); } - public void ClearRenderTargetDepthStencil(float depthValue, bool depthMask, int stencilValue, int stencilMask) + public void ClearRenderTargetDepthStencil(int layer, float depthValue, bool depthMask, int stencilValue, int stencilMask) { bool stencilMaskChanged = stencilMask != 0 && @@ -144,6 +148,8 @@ namespace Ryujinx.Graphics.OpenGL GL.DepthMask(depthMask); } + _framebuffer.AttachDepthStencilLayerForClear(layer); + if (depthMask && stencilMask != 0) { GL.ClearBuffer(ClearBufferCombined.DepthStencil, 0, depthValue, stencilValue); @@ -157,6 +163,8 @@ namespace Ryujinx.Graphics.OpenGL GL.ClearBuffer(OpenTK.Graphics.OpenGL.ClearBuffer.Stencil, 0, ref stencilValue); } + _framebuffer.DetachDepthStencilLayerForClear(); + if (stencilMaskChanged) { GL.StencilMaskSeparate(StencilFace.Front, _stencilFrontMask); @@ -597,6 +605,8 @@ namespace Ryujinx.Graphics.OpenGL GL.EndTransformFeedback(); } + GL.ClipControl(ClipOrigin.UpperLeft, ClipDepthMode.NegativeOneToOne); + _drawTexture.Draw( view, samp, @@ -627,6 +637,8 @@ namespace Ryujinx.Graphics.OpenGL { GL.BeginTransformFeedback(_tfTopology); } + + RestoreClipControl(); } } } @@ -1094,45 +1106,45 @@ namespace Ryujinx.Graphics.OpenGL _framebuffer.SetDrawBuffers(colors.Length); } - public void SetSampler(int binding, ISampler sampler) + public unsafe void SetScissors(ReadOnlySpan> regions) { - if (sampler == null) + int count = Math.Min(regions.Length, Constants.MaxViewports); + + int* v = stackalloc int[count * 4]; + + for (int index = 0; index < count; index++) { - return; - } + int vIndex = index * 4; - Sampler samp = (Sampler)sampler; + var region = regions[index]; - if (binding == 0) - { - _unit0Sampler = samp; - } + bool enabled = (region.X | region.Y) != 0 || region.Width != 0xffff || region.Height != 0xffff; + uint mask = 1u << index; - samp.Bind(binding); - } - - public void SetScissor(int index, bool enable, int x, int y, int width, int height) - { - uint mask = 1u << index; - - if (!enable) - { - if ((_scissorEnables & mask) != 0) + if (enabled) { - _scissorEnables &= ~mask; - GL.Disable(IndexedEnableCap.ScissorTest, index); + v[vIndex] = region.X; + v[vIndex + 1] = region.Y; + v[vIndex + 2] = region.Width; + v[vIndex + 3] = region.Height; + + if ((_scissorEnables & mask) == 0) + { + _scissorEnables |= mask; + GL.Enable(IndexedEnableCap.ScissorTest, index); + } + } + else + { + if ((_scissorEnables & mask) != 0) + { + _scissorEnables &= ~mask; + GL.Disable(IndexedEnableCap.ScissorTest, index); + } } - - return; } - if ((_scissorEnables & mask) == 0) - { - _scissorEnables |= mask; - GL.Enable(IndexedEnableCap.ScissorTest, index); - } - - GL.ScissorIndexed(index, x, y, width, height); + GL.ScissorArray(0, count, v); } public void SetStencilTest(StencilTestDescriptor stencilTest) @@ -1183,23 +1195,31 @@ namespace Ryujinx.Graphics.OpenGL SetBuffers(first, buffers, isStorage: true); } - public void SetTexture(int binding, ITexture texture) + public void SetTextureAndSampler(ShaderStage stage, int binding, ITexture texture, ISampler sampler) { - if (texture == null) + if (texture != null) { - return; + if (binding == 0) + { + _unit0Texture = (TextureBase)texture; + } + else + { + ((TextureBase)texture).Bind(binding); + } } + Sampler glSampler = (Sampler)sampler; + + glSampler?.Bind(binding); + if (binding == 0) { - _unit0Texture = (TextureBase)texture; - } - else - { - ((TextureBase)texture).Bind(binding); + _unit0Sampler = glSampler; } } + public void SetTransformFeedbackBuffers(ReadOnlySpan buffers) { if (_tfEnabled) diff --git a/Ryujinx.Graphics.OpenGL/Queries/CounterQueueEvent.cs b/Ryujinx.Graphics.OpenGL/Queries/CounterQueueEvent.cs index 8b0ae30ea..81451389c 100644 --- a/Ryujinx.Graphics.OpenGL/Queries/CounterQueueEvent.cs +++ b/Ryujinx.Graphics.OpenGL/Queries/CounterQueueEvent.cs @@ -1,5 +1,4 @@ using OpenTK.Graphics.OpenGL; -using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using System; using System.Threading; diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index 2a9ab4223..67d685f6c 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -87,6 +87,11 @@ namespace Ryujinx.Graphics.OpenGL Buffer.Delete(buffer); } + public HardwareInfo GetHardwareInfo() + { + return new HardwareInfo(GpuVendor, GpuRenderer); + } + public ReadOnlySpan GetBufferData(BufferHandle buffer, int offset, int size) { return Buffer.GetData(this, buffer, offset, size); @@ -100,11 +105,15 @@ namespace Ryujinx.Graphics.OpenGL hasFrontFacingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows, hasVectorIndexingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows, supportsAstcCompression: HwCapabilities.SupportsAstcCompression, + supportsBc123Compression: HwCapabilities.SupportsTextureCompressionS3tc, + supportsBc45Compression: HwCapabilities.SupportsTextureCompressionRgtc, + supportsBc67Compression: true, // Should check BPTC extension, but for some reason NVIDIA is not exposing the extension. supports3DTextureCompression: false, supportsBgraFormat: false, supportsR4G4Format: false, supportsFragmentShaderInterlock: HwCapabilities.SupportsFragmentShaderInterlock, supportsFragmentShaderOrderingIntel: HwCapabilities.SupportsFragmentShaderOrdering, + supportsGeometryShaderPassthrough: HwCapabilities.SupportsGeometryShaderPassthrough, supportsImageLoadFormatted: HwCapabilities.SupportsImageLoadFormatted, supportsMismatchingViewFormat: HwCapabilities.SupportsMismatchingViewFormat, supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset, @@ -112,6 +121,10 @@ namespace Ryujinx.Graphics.OpenGL supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod, supportsViewportSwizzle: HwCapabilities.SupportsViewportSwizzle, supportsIndirectParameters: HwCapabilities.SupportsIndirectParameters, + maximumUniformBuffersPerStage: 13, // TODO: Avoid hardcoding those limits here and get from driver? + maximumStorageBuffersPerStage: 16, + maximumTexturesPerStage: 32, + maximumImagesPerStage: 8, maximumComputeSharedMemorySize: HwCapabilities.MaximumComputeSharedMemorySize, maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy, storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment); diff --git a/Ryujinx.Graphics.Shader/AlphaTestOp.cs b/Ryujinx.Graphics.Shader/AlphaTestOp.cs new file mode 100644 index 000000000..57c0d1314 --- /dev/null +++ b/Ryujinx.Graphics.Shader/AlphaTestOp.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Shader +{ + public enum AlphaTestOp + { + Never = 1, + Less, + Equal, + LessOrEqual, + Greater, + NotEqual, + GreaterOrEqual, + Always + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/AttributeType.cs b/Ryujinx.Graphics.Shader/AttributeType.cs new file mode 100644 index 000000000..466f06cc8 --- /dev/null +++ b/Ryujinx.Graphics.Shader/AttributeType.cs @@ -0,0 +1,36 @@ +using System; + +namespace Ryujinx.Graphics.Shader +{ + public enum AttributeType : byte + { + Float, + Sint, + Uint + } + + static class AttributeTypeExtensions + { + public static string GetScalarType(this AttributeType type) + { + return type switch + { + AttributeType.Float => "float", + AttributeType.Sint => "int", + AttributeType.Uint => "uint", + _ => throw new ArgumentException($"Invalid attribute type \"{type}\".") + }; + } + + public static string GetVec4Type(this AttributeType type) + { + return type switch + { + AttributeType.Float => "vec4", + AttributeType.Sint => "ivec4", + AttributeType.Uint => "uvec4", + _ => throw new ArgumentException($"Invalid attribute type \"{type}\".") + }; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs index 825347497..418af6cb7 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs @@ -70,53 +70,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl AppendLine("}" + suffix); } - public (TextureDescriptor, int) FindTextureDescriptor(AstTextureOperation texOp) - { - TextureDescriptor[] descriptors = Config.GetTextureDescriptors(); - - for (int i = 0; i < descriptors.Length; i++) - { - var descriptor = descriptors[i]; - - if (descriptor.CbufSlot == texOp.CbufSlot && - descriptor.HandleIndex == texOp.Handle && - descriptor.Format == texOp.Format) - { - return (descriptor, i); - } - } - - return (default, -1); - } - - private static int FindDescriptorIndex(TextureDescriptor[] array, AstTextureOperation texOp) - { - for (int i = 0; i < array.Length; i++) - { - var descriptor = array[i]; - - if (descriptor.Type == texOp.Type && - descriptor.CbufSlot == texOp.CbufSlot && - descriptor.HandleIndex == texOp.Handle && - descriptor.Format == texOp.Format) - { - return i; - } - } - - return -1; - } - - public int FindTextureDescriptorIndex(AstTextureOperation texOp) - { - return FindDescriptorIndex(Config.GetTextureDescriptors(), texOp); - } - - public int FindImageDescriptorIndex(AstTextureOperation texOp) - { - return FindDescriptorIndex(Config.GetImageDescriptors(), texOp); - } - public StructuredFunction GetFunction(int id) { return _info.Functions[id]; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index 59a7ccdca..8be2a32c4 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -11,7 +11,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { public static void Declare(CodeGenContext context, StructuredProgramInfo info) { - context.AppendLine("#version 450 core"); + context.AppendLine(context.Config.Options.TargetApi == TargetApi.Vulkan ? "#version 460 core" : "#version 450 core"); context.AppendLine("#extension GL_ARB_gpu_shader_int64 : enable"); if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) @@ -43,8 +43,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine("#extension GL_INTEL_fragment_shader_ordering : enable"); } } + else + { + context.AppendLine("#extension GL_ARB_shader_viewport_layer_array : enable"); + } - if (context.Config.GpPassthrough) + if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) { context.AppendLine("#extension GL_NV_geometry_shader_passthrough : enable"); } @@ -123,11 +127,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { if (context.Config.Stage == ShaderStage.Geometry) { - string inPrimitive = context.Config.GpuAccessor.QueryPrimitiveTopology().ToGlslString(); + InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology(); + string inPrimitive = inputTopology.ToGlslString(); - context.AppendLine($"layout ({inPrimitive}) in;"); + context.AppendLine($"layout (invocations = {context.Config.ThreadsPerInputPrimitive}, {inPrimitive}) in;"); - if (context.Config.GpPassthrough) + if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) { context.AppendLine($"layout (passthrough) in gl_PerVertex"); context.EnterScope(); @@ -140,7 +145,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { string outPrimitive = context.Config.OutputTopology.ToGlslString(); - int maxOutputVertices = context.Config.MaxOutputVertices; + int maxOutputVertices = context.Config.GpPassthrough + ? inputTopology.ToInputVertices() + : context.Config.MaxOutputVertices; context.AppendLine($"layout ({outPrimitive}, max_vertices = {maxOutputVertices}) out;"); } @@ -196,12 +203,36 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.Config.Stage != ShaderStage.Fragment && context.Config.TransformFeedbackEnabled) { - var tfOutput = context.GetTransformFeedbackOutput(AttributeConsts.PositionX); - if (tfOutput.Valid) + var tfPosition = context.GetTransformFeedbackOutput(AttributeConsts.PositionX); + var tfPointSize = context.GetTransformFeedbackOutput(AttributeConsts.PointSize); + var tfClipDistance = context.GetTransformFeedbackOutput(AttributeConsts.ClipDistance0); + + if (tfPosition.Valid || tfPointSize.Valid || tfClipDistance.Valid) { - context.AppendLine($"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) out gl_PerVertex"); + context.AppendLine("out gl_PerVertex"); context.EnterScope(); - context.AppendLine("vec4 gl_Position;"); + context.AppendLine($"{GetTfLayout(tfPosition)}vec4 gl_Position;"); + context.AppendLine($"{GetTfLayout(tfPointSize)}float gl_PointSize;"); + + if (tfClipDistance.Valid) + { + int clipDistanceCount = 1; + + for (; clipDistanceCount < 8; clipDistanceCount++) + { + if (!context.GetTransformFeedbackOutput(AttributeConsts.ClipDistance0 + clipDistanceCount).Valid) + { + break; + } + } + + context.AppendLine($"{GetTfLayout(tfClipDistance)}float gl_ClipDistance[{clipDistanceCount}];"); + } + else + { + context.AppendLine("float gl_ClipDistance[];"); + } + context.LeaveScope(context.Config.Stage == ShaderStage.TessellationControl ? " gl_out[];" : ";"); } } @@ -311,6 +342,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } } + private static string GetTfLayout(TransformFeedbackOutput tfOutput) + { + if (tfOutput.Valid) + { + return $"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) "; + } + + return string.Empty; + } + public static void DeclareLocals(CodeGenContext context, StructuredFunction function) { foreach (AstOperand decl in function.Locals) @@ -326,11 +367,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl switch (type) { case VariableType.Bool: return "bool"; - case VariableType.F32: return "precise float"; - case VariableType.F64: return "double"; + case VariableType.F32: return "precise float"; + case VariableType.F64: return "double"; case VariableType.None: return "void"; - case VariableType.S32: return "int"; - case VariableType.U32: return "uint"; + case VariableType.S32: return "int"; + case VariableType.U32: return "uint"; } throw new ArgumentException($"Invalid variable type \"{type}\"."); @@ -417,10 +458,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl if (context.Config.Options.TargetApi == TargetApi.Vulkan) { - bool isBuffer = (descriptor.Type & SamplerType.Mask) == SamplerType.TextureBuffer; - int setIndex = isBuffer ? 4 : 2; - - layout = $", set = {setIndex}"; + layout = ", set = 2"; } context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {samplerTypeName} {samplerName};"); @@ -470,10 +508,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl if (context.Config.Options.TargetApi == TargetApi.Vulkan) { - bool isBuffer = (descriptor.Type & SamplerType.Mask) == SamplerType.TextureBuffer; - int setIndex = isBuffer ? 5 : 3; - - layout = $", set = {setIndex}{layout}"; + layout = $", set = 3{layout}"; } context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {imageTypeName} {imageName};"); @@ -525,29 +560,59 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl }; } - string pass = (context.Config.PassthroughAttributes & (1 << attr)) != 0 ? "passthrough, " : string.Empty; + bool passthrough = (context.Config.PassthroughAttributes & (1 << attr)) != 0; + string pass = passthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough() ? "passthrough, " : string.Empty; string name = $"{DefaultNames.IAttributePrefix}{attr}"; if (context.Config.TransformFeedbackEnabled && context.Config.Stage != ShaderStage.Vertex) { + string type; + + if (context.Config.Stage == ShaderStage.Vertex) + { + type = context.Config.GpuAccessor.QueryAttributeType(attr).GetScalarType(); + } + else + { + type = AttributeType.Float.GetScalarType(); + } + for (int c = 0; c < 4; c++) { char swzMask = "xyzw"[c]; - context.AppendLine($"layout ({pass}location = {attr}, component = {c}) {iq}in float {name}_{swzMask}{suffix};"); + context.AppendLine($"layout ({pass}location = {attr}, component = {c}) {iq}in {type} {name}_{swzMask}{suffix};"); } } else { - context.AppendLine($"layout ({pass}location = {attr}) {iq}in vec4 {name}{suffix};"); + string type; + + if (context.Config.Stage == ShaderStage.Vertex) + { + type = context.Config.GpuAccessor.QueryAttributeType(attr).GetVec4Type(); + } + else + { + type = AttributeType.Float.GetVec4Type(); + } + + context.AppendLine($"layout ({pass}location = {attr}) {iq}in {type} {name}{suffix};"); } } private static void DeclareInputAttributePerPatch(CodeGenContext context, int attr) { + string layout = string.Empty; + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + layout = $"layout (location = {32 + attr}) "; + } + string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}"; - context.AppendLine($"patch in vec4 {name};"); + context.AppendLine($"{layout}patch in vec4 {name};"); } private static void DeclareOutputAttributes(CodeGenContext context, StructuredProgramInfo info) @@ -608,14 +673,21 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl private static void DeclareOutputAttributePerPatch(CodeGenContext context, int attr) { + string layout = string.Empty; + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + layout = $"layout (location = {32 + attr}) "; + } + string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}"; - context.AppendLine($"patch out vec4 {name};"); + context.AppendLine($"{layout}patch out vec4 {name};"); } private static void DeclareSupportUniformBlock(CodeGenContext context, ShaderStage stage, int scaleElements) { - bool needsSupportBlock = stage == ShaderStage.Fragment || + bool needsSupportBlock = stage == ShaderStage.Fragment || (context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable()); if (!needsSupportBlock && scaleElements == 0) diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs index 3af120f88..e9dbdd2d3 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs @@ -127,7 +127,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl else if (node is AstAssignment assignment) { VariableType srcType = OperandManager.GetNodeDestType(context, assignment.Source); - VariableType dstType = OperandManager.GetNodeDestType(context, assignment.Destination); + VariableType dstType = OperandManager.GetNodeDestType(context, assignment.Destination, isAsgDest: true); string dest; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs index 69214a355..c40f96f11 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -7,11 +7,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions { static class InstGenHelper { - private static InstInfo[] _infoTbl; + private static readonly InstInfo[] InfoTable; static InstGenHelper() { - _infoTbl = new InstInfo[(int)Instruction.Count]; + InfoTable = new InstInfo[(int)Instruction.Count]; Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd"); Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd"); @@ -139,12 +139,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions private static void Add(Instruction inst, InstType flags, string opName = null, int precedence = 0) { - _infoTbl[(int)inst] = new InstInfo(flags, opName, precedence); + InfoTable[(int)inst] = new InstInfo(flags, opName, precedence); } public static InstInfo GetInstructionInfo(Instruction inst) { - return _infoTbl[(int)(inst & Instruction.Mask)]; + return InfoTable[(int)(inst & Instruction.Mask)]; } public static string GetSoureExpr(CodeGenContext context, IAstNode node, VariableType dstType) @@ -191,7 +191,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return false; } - InstInfo info = _infoTbl[(int)(operation.Inst & Instruction.Mask)]; + InstInfo info = InfoTable[(int)(operation.Inst & Instruction.Mask)]; if ((info.Type & (InstType.Call | InstType.Special)) != 0) { diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index 6805f2faa..094040013 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -85,13 +85,13 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions string ApplyScaling(string vector) { - if ((context.Config.Stage.SupportsRenderScale()) && + if (context.Config.Stage.SupportsRenderScale() && texOp.Inst == Instruction.ImageLoad && !isBindless && !isIndexed) { // Image scales start after texture ones. - int scaleIndex = context.Config.GetTextureDescriptors().Length + context.FindImageDescriptorIndex(texOp); + int scaleIndex = context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp); if (pCount == 3 && isArray) { @@ -621,11 +621,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions { if (intCoords) { - if ((context.Config.Stage.SupportsRenderScale()) && + if (context.Config.Stage.SupportsRenderScale() && !isBindless && !isIndexed) { - int index = context.FindTextureDescriptorIndex(texOp); + int index = context.Config.FindTextureDescriptorIndex(texOp); if (pCount == 3 && isArray) { @@ -762,7 +762,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions } else { - (TextureDescriptor descriptor, int descriptorIndex) = context.FindTextureDescriptor(texOp); + (TextureDescriptor descriptor, int descriptorIndex) = context.Config.FindTextureDescriptor(texOp); bool hasLod = !descriptor.Type.HasFlag(SamplerType.Multisample) && descriptor.Type != SamplerType.TextureBuffer; string texCall; @@ -780,6 +780,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions } if (context.Config.Stage.SupportsRenderScale() && + (texOp.Index < 2 || (texOp.Type & SamplerType.Mask) == SamplerType.Texture3D) && !isBindless && !isIndexed) { diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs index 334c744d7..74dfd0236 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs @@ -11,7 +11,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { class OperandManager { - private static string[] _stagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" }; + private static readonly string[] StagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" }; private struct BuiltInAttribute { @@ -26,8 +26,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } } - private static Dictionary _builtInAttributes = - new Dictionary() + private static Dictionary _builtInAttributes = new Dictionary() { { AttributeConsts.TessLevelOuter0, new BuiltInAttribute("gl_TessLevelOuter[0]", VariableType.F32) }, { AttributeConsts.TessLevelOuter1, new BuiltInAttribute("gl_TessLevelOuter[1]", VariableType.F32) }, @@ -276,7 +275,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl string name = builtInAttr.Name; - if (!perPatch && IsArrayAttribute(config.Stage, isOutAttr) && IsArrayBuiltIn(value)) + if (!perPatch && IsArrayAttribute(config.Stage, isOutAttr) && AttributeInfo.IsArrayBuiltIn(value)) { name = isOutAttr ? $"gl_out[gl_InvocationID].{name}" : $"gl_in[{indexExpr}].{name}"; } @@ -318,18 +317,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } } - private static bool IsArrayBuiltIn(int attr) - { - if (attr <= AttributeConsts.TessLevelInner1 || - attr == AttributeConsts.TessCoordX || - attr == AttributeConsts.TessCoordY) - { - return false; - } - - return (attr & AttributeConsts.SpecialMask) == 0; - } - public static string GetUbName(ShaderStage stage, int slot, bool cbIndexable) { if (cbIndexable) @@ -391,12 +378,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { int index = (int)stage; - if ((uint)index >= _stagePrefixes.Length) + if ((uint)index >= StagePrefixes.Length) { return "invalid"; } - return _stagePrefixes[index]; + return StagePrefixes[index]; } private static char GetSwizzleMask(int value) @@ -409,7 +396,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl return $"{DefaultNames.ArgumentNamePrefix}{argIndex}"; } - public static VariableType GetNodeDestType(CodeGenContext context, IAstNode node) + public static VariableType GetNodeDestType(CodeGenContext context, IAstNode node, bool isAsgDest = false) { if (node is AstOperation operation) { @@ -455,7 +442,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl return context.CurrentFunction.GetArgumentType(argIndex); } - return GetOperandVarType(operand); + return GetOperandVarType(context, operand, isAsgDest); } else { @@ -463,7 +450,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } } - private static VariableType GetOperandVarType(AstOperand operand) + private static VariableType GetOperandVarType(CodeGenContext context, AstOperand operand, bool isAsgDest = false) { if (operand.Type == OperandType.Attribute) { @@ -471,6 +458,21 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { return builtInAttr.Type; } + else if (context.Config.Stage == ShaderStage.Vertex && !isAsgDest && + operand.Value >= AttributeConsts.UserAttributeBase && + operand.Value < AttributeConsts.UserAttributeEnd) + { + int location = (operand.Value - AttributeConsts.UserAttributeBase) / 16; + + AttributeType type = context.Config.GpuAccessor.QueryAttributeType(location); + + return type switch + { + AttributeType.Sint => VariableType.S32, + AttributeType.Uint => VariableType.U32, + _ => VariableType.F32 + }; + } } return OperandInfo.GetVarType(operand); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs new file mode 100644 index 000000000..03d26553c --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs @@ -0,0 +1,554 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using Spv.Generator; +using System; +using System.Collections.Generic; +using System.Linq; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using IrConsts = IntermediateRepresentation.IrConsts; + using IrOperandType = IntermediateRepresentation.OperandType; + + partial class CodeGenContext : Module + { + private readonly StructuredProgramInfo _info; + + public ShaderConfig Config { get; } + + public int InputVertices { get; } + + public Dictionary UniformBuffers { get; } = new Dictionary(); + public Instruction SupportBuffer { get; set; } + public Instruction UniformBuffersArray { get; set; } + public Instruction StorageBuffersArray { get; set; } + public Instruction LocalMemory { get; set; } + public Instruction SharedMemory { get; set; } + public Instruction InputsArray { get; set; } + public Instruction OutputsArray { get; set; } + public Dictionary SamplersTypes { get; } = new Dictionary(); + public Dictionary Samplers { get; } = new Dictionary(); + public Dictionary Images { get; } = new Dictionary(); + public Dictionary Inputs { get; } = new Dictionary(); + public Dictionary Outputs { get; } = new Dictionary(); + public Dictionary InputsPerPatch { get; } = new Dictionary(); + public Dictionary OutputsPerPatch { get; } = new Dictionary(); + + public Instruction CoordTemp { get; set; } + private readonly Dictionary _locals = new Dictionary(); + private readonly Dictionary _localForArgs = new Dictionary(); + private readonly Dictionary _funcArgs = new Dictionary(); + private readonly Dictionary _functions = new Dictionary(); + + private class BlockState + { + private int _entryCount; + private readonly List _labels = new List(); + + public Instruction GetNextLabel(CodeGenContext context) + { + return GetLabel(context, _entryCount); + } + + public Instruction GetNextLabelAutoIncrement(CodeGenContext context) + { + return GetLabel(context, _entryCount++); + } + + public Instruction GetLabel(CodeGenContext context, int index) + { + while (index >= _labels.Count) + { + _labels.Add(context.Label()); + } + + return _labels[index]; + } + } + + private readonly Dictionary _labels = new Dictionary(); + + public Dictionary LoopTargets { get; set; } + + public AstBlock CurrentBlock { get; private set; } + + public SpirvDelegates Delegates { get; } + + public CodeGenContext( + StructuredProgramInfo info, + ShaderConfig config, + GeneratorPool instPool, + GeneratorPool integerPool) : base(0x00010300, instPool, integerPool) + { + _info = info; + Config = config; + + if (config.Stage == ShaderStage.Geometry) + { + InputTopology inPrimitive = config.GpuAccessor.QueryPrimitiveTopology(); + + InputVertices = inPrimitive switch + { + InputTopology.Points => 1, + InputTopology.Lines => 2, + InputTopology.LinesAdjacency => 2, + InputTopology.Triangles => 3, + InputTopology.TrianglesAdjacency => 3, + _ => throw new InvalidOperationException($"Invalid input topology \"{inPrimitive}\".") + }; + } + + AddCapability(Capability.Shader); + AddCapability(Capability.Float64); + + SetMemoryModel(AddressingModel.Logical, MemoryModel.GLSL450); + + Delegates = new SpirvDelegates(this); + } + + public void StartFunction() + { + _locals.Clear(); + _localForArgs.Clear(); + _funcArgs.Clear(); + } + + public void EnterBlock(AstBlock block) + { + CurrentBlock = block; + AddLabel(GetBlockStateLazy(block).GetNextLabelAutoIncrement(this)); + } + + public Instruction GetFirstLabel(AstBlock block) + { + return GetBlockStateLazy(block).GetLabel(this, 0); + } + + public Instruction GetNextLabel(AstBlock block) + { + return GetBlockStateLazy(block).GetNextLabel(this); + } + + private BlockState GetBlockStateLazy(AstBlock block) + { + if (!_labels.TryGetValue(block, out var blockState)) + { + blockState = new BlockState(); + + _labels.Add(block, blockState); + } + + return blockState; + } + + public Instruction NewBlock() + { + var label = Label(); + Branch(label); + AddLabel(label); + return label; + } + + public Instruction[] GetMainInterface() + { + var mainInterface = new List(); + + mainInterface.AddRange(Inputs.Values); + mainInterface.AddRange(Outputs.Values); + mainInterface.AddRange(InputsPerPatch.Values); + mainInterface.AddRange(OutputsPerPatch.Values); + + if (InputsArray != null) + { + mainInterface.Add(InputsArray); + } + + if (OutputsArray != null) + { + mainInterface.Add(OutputsArray); + } + + return mainInterface.ToArray(); + } + + public void DeclareLocal(AstOperand local, Instruction spvLocal) + { + _locals.Add(local, spvLocal); + } + + public void DeclareLocalForArgs(int funcIndex, Instruction[] spvLocals) + { + _localForArgs.Add(funcIndex, spvLocals); + } + + public void DeclareArgument(int argIndex, Instruction spvLocal) + { + _funcArgs.Add(argIndex, spvLocal); + } + + public void DeclareFunction(int funcIndex, StructuredFunction function, Instruction spvFunc) + { + _functions.Add(funcIndex, (function, spvFunc)); + } + + public Instruction GetFP32(IAstNode node) + { + return Get(AggregateType.FP32, node); + } + + public Instruction GetFP64(IAstNode node) + { + return Get(AggregateType.FP64, node); + } + + public Instruction GetS32(IAstNode node) + { + return Get(AggregateType.S32, node); + } + + public Instruction GetU32(IAstNode node) + { + return Get(AggregateType.U32, node); + } + + public Instruction Get(AggregateType type, IAstNode node) + { + if (node is AstOperation operation) + { + var opResult = Instructions.Generate(this, operation); + return BitcastIfNeeded(type, opResult.Type, opResult.Value); + } + else if (node is AstOperand operand) + { + return operand.Type switch + { + IrOperandType.Argument => GetArgument(type, operand), + IrOperandType.Attribute => GetAttribute(type, operand.Value & AttributeConsts.Mask, (operand.Value & AttributeConsts.LoadOutputMask) != 0), + IrOperandType.AttributePerPatch => GetAttributePerPatch(type, operand.Value & AttributeConsts.Mask, (operand.Value & AttributeConsts.LoadOutputMask) != 0), + IrOperandType.Constant => GetConstant(type, operand), + IrOperandType.ConstantBuffer => GetConstantBuffer(type, operand), + IrOperandType.LocalVariable => GetLocal(type, operand), + IrOperandType.Undefined => Undef(GetType(type)), + _ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\".") + }; + } + + throw new NotImplementedException(node.GetType().Name); + } + + public Instruction GetAttributeElemPointer(int attr, bool isOutAttr, Instruction index, out AggregateType elemType) + { + var storageClass = isOutAttr ? StorageClass.Output : StorageClass.Input; + var attrInfo = AttributeInfo.From(Config, attr, isOutAttr); + + int attrOffset = attrInfo.BaseValue; + AggregateType type = attrInfo.Type; + + Instruction ioVariable, elemIndex; + + bool isUserAttr = attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd; + + if (isUserAttr && + ((!isOutAttr && Config.UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) || + (isOutAttr && Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing)))) + { + elemType = AggregateType.FP32; + ioVariable = isOutAttr ? OutputsArray : InputsArray; + elemIndex = Constant(TypeU32(), attrInfo.GetInnermostIndex()); + var vecIndex = Constant(TypeU32(), (attr - AttributeConsts.UserAttributeBase) >> 4); + + if (Config.Stage == ShaderStage.Geometry && !isOutAttr) + { + return AccessChain(TypePointer(storageClass, GetType(elemType)), ioVariable, index, vecIndex, elemIndex); + } + else + { + return AccessChain(TypePointer(storageClass, GetType(elemType)), ioVariable, vecIndex, elemIndex); + } + } + + bool isViewportInverse = attr == AttributeConsts.SupportBlockViewInverseX || attr == AttributeConsts.SupportBlockViewInverseY; + + if (isViewportInverse) + { + elemType = AggregateType.FP32; + elemIndex = Constant(TypeU32(), (attr - AttributeConsts.SupportBlockViewInverseX) >> 2); + return AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), SupportBuffer, Constant(TypeU32(), 2), elemIndex); + } + + elemType = attrInfo.Type & AggregateType.ElementTypeMask; + + if (isUserAttr && Config.TransformFeedbackEnabled && + ((isOutAttr && Config.Stage != ShaderStage.Fragment) || + (!isOutAttr && Config.Stage != ShaderStage.Vertex))) + { + attrOffset = attr; + type = elemType; + } + + ioVariable = isOutAttr ? Outputs[attrOffset] : Inputs[attrOffset]; + + if ((type & (AggregateType.Array | AggregateType.Vector)) == 0) + { + return ioVariable; + } + + elemIndex = Constant(TypeU32(), attrInfo.GetInnermostIndex()); + + if (Config.Stage == ShaderStage.Geometry && !isOutAttr && (!attrInfo.IsBuiltin || AttributeInfo.IsArrayBuiltIn(attr))) + { + return AccessChain(TypePointer(storageClass, GetType(elemType)), ioVariable, index, elemIndex); + } + else + { + return AccessChain(TypePointer(storageClass, GetType(elemType)), ioVariable, elemIndex); + } + } + + public Instruction GetAttributeElemPointer(Instruction attrIndex, bool isOutAttr, Instruction index, out AggregateType elemType) + { + var storageClass = isOutAttr ? StorageClass.Output : StorageClass.Input; + + elemType = AggregateType.FP32; + var ioVariable = isOutAttr ? OutputsArray : InputsArray; + var vecIndex = ShiftRightLogical(TypeS32(), attrIndex, Constant(TypeS32(), 2)); + var elemIndex = BitwiseAnd(TypeS32(), attrIndex, Constant(TypeS32(), 3)); + + if (Config.Stage == ShaderStage.Geometry && !isOutAttr) + { + return AccessChain(TypePointer(storageClass, GetType(elemType)), ioVariable, index, vecIndex, elemIndex); + } + else + { + return AccessChain(TypePointer(storageClass, GetType(elemType)), ioVariable, vecIndex, elemIndex); + } + } + + public Instruction GetAttribute(AggregateType type, int attr, bool isOutAttr, Instruction index = null) + { + if (!AttributeInfo.Validate(Config, attr, isOutAttr: false)) + { + return GetConstant(type, new AstOperand(IrOperandType.Constant, 0)); + } + + var elemPointer = GetAttributeElemPointer(attr, isOutAttr, index, out var elemType); + var value = Load(GetType(elemType), elemPointer); + + if (Config.Stage == ShaderStage.Fragment) + { + if (attr == AttributeConsts.PositionX || attr == AttributeConsts.PositionY) + { + var pointerType = TypePointer(StorageClass.Uniform, TypeFP32()); + var fieldIndex = Constant(TypeU32(), 4); + var scaleIndex = Constant(TypeU32(), 0); + + var scaleElemPointer = AccessChain(pointerType, SupportBuffer, fieldIndex, scaleIndex); + var scale = Load(TypeFP32(), scaleElemPointer); + + value = FDiv(TypeFP32(), value, scale); + } + else if (attr == AttributeConsts.FrontFacing && Config.GpuAccessor.QueryHostHasFrontFacingBug()) + { + // Workaround for what appears to be a bug on Intel compiler. + var valueFloat = Select(TypeFP32(), value, Constant(TypeFP32(), 1f), Constant(TypeFP32(), 0f)); + var valueAsInt = Bitcast(TypeS32(), valueFloat); + var valueNegated = SNegate(TypeS32(), valueAsInt); + + value = SLessThan(TypeBool(), valueNegated, Constant(TypeS32(), 0)); + } + } + + return BitcastIfNeeded(type, elemType, value); + } + + public Instruction GetAttributePerPatchElemPointer(int attr, bool isOutAttr, out AggregateType elemType) + { + var storageClass = isOutAttr ? StorageClass.Output : StorageClass.Input; + var attrInfo = AttributeInfo.From(Config, attr, isOutAttr); + + int attrOffset = attrInfo.BaseValue; + Instruction ioVariable; + + bool isUserAttr = attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd; + + elemType = attrInfo.Type & AggregateType.ElementTypeMask; + + ioVariable = isOutAttr ? OutputsPerPatch[attrOffset] : InputsPerPatch[attrOffset]; + + if ((attrInfo.Type & (AggregateType.Array | AggregateType.Vector)) == 0) + { + return ioVariable; + } + + var elemIndex = Constant(TypeU32(), attrInfo.GetInnermostIndex()); + return AccessChain(TypePointer(storageClass, GetType(elemType)), ioVariable, elemIndex); + } + + public Instruction GetAttributePerPatch(AggregateType type, int attr, bool isOutAttr) + { + if (!AttributeInfo.Validate(Config, attr, isOutAttr: false)) + { + return GetConstant(type, new AstOperand(IrOperandType.Constant, 0)); + } + + var elemPointer = GetAttributePerPatchElemPointer(attr, isOutAttr, out var elemType); + return BitcastIfNeeded(type, elemType, Load(GetType(elemType), elemPointer)); + } + + public Instruction GetAttribute(AggregateType type, Instruction attr, bool isOutAttr, Instruction index = null) + { + var elemPointer = GetAttributeElemPointer(attr, isOutAttr, index, out var elemType); + return BitcastIfNeeded(type, elemType, Load(GetType(elemType), elemPointer)); + } + + public Instruction GetConstant(AggregateType type, AstOperand operand) + { + return type switch + { + AggregateType.Bool => operand.Value != 0 ? ConstantTrue(TypeBool()) : ConstantFalse(TypeBool()), + AggregateType.FP32 => Constant(TypeFP32(), BitConverter.Int32BitsToSingle(operand.Value)), + AggregateType.FP64 => Constant(TypeFP64(), (double)BitConverter.Int32BitsToSingle(operand.Value)), + AggregateType.S32 => Constant(TypeS32(), operand.Value), + AggregateType.U32 => Constant(TypeU32(), (uint)operand.Value), + _ => throw new ArgumentException($"Invalid type \"{type}\".") + }; + } + + public Instruction GetConstantBuffer(AggregateType type, AstOperand operand) + { + var i1 = Constant(TypeS32(), 0); + var i2 = Constant(TypeS32(), operand.CbufOffset >> 2); + var i3 = Constant(TypeU32(), operand.CbufOffset & 3); + + Instruction elemPointer; + + if (UniformBuffersArray != null) + { + var ubVariable = UniformBuffersArray; + var i0 = Constant(TypeS32(), operand.CbufSlot); + + elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i0, i1, i2, i3); + } + else + { + var ubVariable = UniformBuffers[operand.CbufSlot]; + + elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i1, i2, i3); + } + + return BitcastIfNeeded(type, AggregateType.FP32, Load(TypeFP32(), elemPointer)); + } + + public Instruction GetLocalPointer(AstOperand local) + { + return _locals[local]; + } + + public Instruction[] GetLocalForArgsPointers(int funcIndex) + { + return _localForArgs[funcIndex]; + } + + public Instruction GetArgumentPointer(AstOperand funcArg) + { + return _funcArgs[funcArg.Value]; + } + + public Instruction GetLocal(AggregateType dstType, AstOperand local) + { + var srcType = local.VarType.Convert(); + return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetLocalPointer(local))); + } + + public Instruction GetArgument(AggregateType dstType, AstOperand funcArg) + { + var srcType = funcArg.VarType.Convert(); + return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetArgumentPointer(funcArg))); + } + + public (StructuredFunction, Instruction) GetFunction(int funcIndex) + { + return _functions[funcIndex]; + } + + public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component) + { + int index = (AttributeConsts.UserAttributeBase / 4) + location * 4 + component; + return _info.TransformFeedbackOutputs[index]; + } + + public TransformFeedbackOutput GetTransformFeedbackOutput(int location) + { + int index = location / 4; + return _info.TransformFeedbackOutputs[index]; + } + + public Instruction GetType(AggregateType type, int length = 1) + { + if (type.HasFlag(AggregateType.Array)) + { + return TypeArray(GetType(type & ~AggregateType.Array), Constant(TypeU32(), length)); + } + else if (type.HasFlag(AggregateType.Vector)) + { + return TypeVector(GetType(type & ~AggregateType.Vector), length); + } + + return type switch + { + AggregateType.Void => TypeVoid(), + AggregateType.Bool => TypeBool(), + AggregateType.FP32 => TypeFP32(), + AggregateType.FP64 => TypeFP64(), + AggregateType.S32 => TypeS32(), + AggregateType.U32 => TypeU32(), + _ => throw new ArgumentException($"Invalid attribute type \"{type}\".") + }; + } + + public Instruction BitcastIfNeeded(AggregateType dstType, AggregateType srcType, Instruction value) + { + if (dstType == srcType) + { + return value; + } + + if (dstType == AggregateType.Bool) + { + return INotEqual(TypeBool(), BitcastIfNeeded(AggregateType.S32, srcType, value), Constant(TypeS32(), 0)); + } + else if (srcType == AggregateType.Bool) + { + var intTrue = Constant(TypeS32(), IrConsts.True); + var intFalse = Constant(TypeS32(), IrConsts.False); + + return BitcastIfNeeded(dstType, AggregateType.S32, Select(TypeS32(), value, intTrue, intFalse)); + } + else + { + return Bitcast(GetType(dstType, 1), value); + } + } + + public Instruction TypeS32() + { + return TypeInt(32, true); + } + + public Instruction TypeU32() + { + return TypeInt(32, false); + } + + public Instruction TypeFP32() + { + return TypeFloat(32); + } + + public Instruction TypeFP64() + { + return TypeFloat(64); + } + } +} diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs new file mode 100644 index 000000000..728e8053a --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs @@ -0,0 +1,712 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using Spv.Generator; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + + static class Declarations + { + // At least 16 attributes are guaranteed by the spec. + public const int MaxAttributes = 16; + + private static readonly string[] StagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" }; + + public static void DeclareParameters(CodeGenContext context, StructuredFunction function) + { + DeclareParameters(context, function.InArguments, 0); + DeclareParameters(context, function.OutArguments, function.InArguments.Length); + } + + private static void DeclareParameters(CodeGenContext context, IEnumerable argTypes, int argIndex) + { + foreach (var argType in argTypes) + { + var argPointerType = context.TypePointer(StorageClass.Function, context.GetType(argType.Convert())); + var spvArg = context.FunctionParameter(argPointerType); + + context.DeclareArgument(argIndex++, spvArg); + } + } + + public static void DeclareLocals(CodeGenContext context, StructuredFunction function) + { + foreach (AstOperand local in function.Locals) + { + var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(local.VarType.Convert())); + var spvLocal = context.Variable(localPointerType, StorageClass.Function); + + context.AddLocalVariable(spvLocal); + context.DeclareLocal(local, spvLocal); + } + + var ivector2Type = context.TypeVector(context.TypeS32(), 2); + var coordTempPointerType = context.TypePointer(StorageClass.Function, ivector2Type); + var coordTemp = context.Variable(coordTempPointerType, StorageClass.Function); + + context.AddLocalVariable(coordTemp); + context.CoordTemp = coordTemp; + } + + public static void DeclareLocalForArgs(CodeGenContext context, List functions) + { + for (int funcIndex = 0; funcIndex < functions.Count; funcIndex++) + { + StructuredFunction function = functions[funcIndex]; + SpvInstruction[] locals = new SpvInstruction[function.InArguments.Length]; + + for (int i = 0; i < function.InArguments.Length; i++) + { + var type = function.GetArgumentType(i).Convert(); + var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(type)); + var spvLocal = context.Variable(localPointerType, StorageClass.Function); + + context.AddLocalVariable(spvLocal); + + locals[i] = spvLocal; + } + + context.DeclareLocalForArgs(funcIndex, locals); + } + } + + public static void DeclareAll(CodeGenContext context, StructuredProgramInfo info) + { + if (context.Config.Stage == ShaderStage.Compute) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4); + + if (localMemorySize != 0) + { + DeclareLocalMemory(context, localMemorySize); + } + + int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4); + + if (sharedMemorySize != 0) + { + DeclareSharedMemory(context, sharedMemorySize); + } + } + else if (context.Config.LocalMemorySize != 0) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4); + DeclareLocalMemory(context, localMemorySize); + } + + DeclareSupportBuffer(context); + DeclareUniformBuffers(context, context.Config.GetConstantBufferDescriptors()); + DeclareStorageBuffers(context, context.Config.GetStorageBufferDescriptors()); + DeclareSamplers(context, context.Config.GetTextureDescriptors()); + DeclareImages(context, context.Config.GetImageDescriptors()); + DeclareInputAttributes(context, info, perPatch: false); + DeclareOutputAttributes(context, info, perPatch: false); + DeclareInputAttributes(context, info, perPatch: true); + DeclareOutputAttributes(context, info, perPatch: true); + } + + private static void DeclareLocalMemory(CodeGenContext context, int size) + { + context.LocalMemory = DeclareMemory(context, StorageClass.Private, size); + } + + private static void DeclareSharedMemory(CodeGenContext context, int size) + { + context.SharedMemory = DeclareMemory(context, StorageClass.Workgroup, size); + } + + private static SpvInstruction DeclareMemory(CodeGenContext context, StorageClass storage, int size) + { + var arrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), size)); + var pointerType = context.TypePointer(storage, arrayType); + var variable = context.Variable(pointerType, storage); + + context.AddGlobalVariable(variable); + + return variable; + } + + private static void DeclareSupportBuffer(CodeGenContext context) + { + if (!context.Config.Stage.SupportsRenderScale() && !(context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable())) + { + return; + } + + var isBgraArrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), SupportBuffer.FragmentIsBgraCount)); + var viewportInverseVectorType = context.TypeVector(context.TypeFP32(), 4); + var renderScaleArrayType = context.TypeArray(context.TypeFP32(), context.Constant(context.TypeU32(), SupportBuffer.RenderScaleMaxCount)); + + context.Decorate(isBgraArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize); + context.Decorate(renderScaleArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize); + + var supportBufferStructType = context.TypeStruct(false, context.TypeU32(), isBgraArrayType, viewportInverseVectorType, context.TypeS32(), renderScaleArrayType); + + context.MemberDecorate(supportBufferStructType, 0, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentAlphaTestOffset); + context.MemberDecorate(supportBufferStructType, 1, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentIsBgraOffset); + context.MemberDecorate(supportBufferStructType, 2, Decoration.Offset, (LiteralInteger)SupportBuffer.ViewportInverseOffset); + context.MemberDecorate(supportBufferStructType, 3, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentRenderScaleCountOffset); + context.MemberDecorate(supportBufferStructType, 4, Decoration.Offset, (LiteralInteger)SupportBuffer.GraphicsRenderScaleOffset); + context.Decorate(supportBufferStructType, Decoration.Block); + + var supportBufferPointerType = context.TypePointer(StorageClass.Uniform, supportBufferStructType); + var supportBufferVariable = context.Variable(supportBufferPointerType, StorageClass.Uniform); + + context.Decorate(supportBufferVariable, Decoration.DescriptorSet, (LiteralInteger)0); + context.Decorate(supportBufferVariable, Decoration.Binding, (LiteralInteger)0); + + context.AddGlobalVariable(supportBufferVariable); + + context.SupportBuffer = supportBufferVariable; + } + + private static void DeclareUniformBuffers(CodeGenContext context, BufferDescriptor[] descriptors) + { + if (descriptors.Length == 0) + { + return; + } + + uint ubSize = Constants.ConstantBufferSize / 16; + + var ubArrayType = context.TypeArray(context.TypeVector(context.TypeFP32(), 4), context.Constant(context.TypeU32(), ubSize), true); + context.Decorate(ubArrayType, Decoration.ArrayStride, (LiteralInteger)16); + var ubStructType = context.TypeStruct(true, ubArrayType); + context.Decorate(ubStructType, Decoration.Block); + context.MemberDecorate(ubStructType, 0, Decoration.Offset, (LiteralInteger)0); + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing)) + { + int count = descriptors.Max(x => x.Slot) + 1; + + var ubStructArrayType = context.TypeArray(ubStructType, context.Constant(context.TypeU32(), count)); + var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructArrayType); + var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform); + + context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_u"); + context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0); + context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstConstantBufferBinding); + context.AddGlobalVariable(ubVariable); + + context.UniformBuffersArray = ubVariable; + } + else + { + var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructType); + + foreach (var descriptor in descriptors) + { + var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform); + + context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_c{descriptor.Slot}"); + context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0); + context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding); + context.AddGlobalVariable(ubVariable); + context.UniformBuffers.Add(descriptor.Slot, ubVariable); + } + } + } + + private static void DeclareStorageBuffers(CodeGenContext context, BufferDescriptor[] descriptors) + { + if (descriptors.Length == 0) + { + return; + } + + int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 1 : 0; + int count = descriptors.Max(x => x.Slot) + 1; + + var sbArrayType = context.TypeRuntimeArray(context.TypeU32()); + context.Decorate(sbArrayType, Decoration.ArrayStride, (LiteralInteger)4); + var sbStructType = context.TypeStruct(true, sbArrayType); + context.Decorate(sbStructType, Decoration.BufferBlock); + context.MemberDecorate(sbStructType, 0, Decoration.Offset, (LiteralInteger)0); + var sbStructArrayType = context.TypeArray(sbStructType, context.Constant(context.TypeU32(), count)); + var sbPointerType = context.TypePointer(StorageClass.Uniform, sbStructArrayType); + var sbVariable = context.Variable(sbPointerType, StorageClass.Uniform); + + context.Name(sbVariable, $"{GetStagePrefix(context.Config.Stage)}_s"); + context.Decorate(sbVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); + context.Decorate(sbVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstStorageBufferBinding); + context.AddGlobalVariable(sbVariable); + + context.StorageBuffersArray = sbVariable; + } + + private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors) + { + foreach (var descriptor in descriptors) + { + var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format); + + if (context.Samplers.ContainsKey(meta)) + { + continue; + } + + int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 2 : 0; + + var dim = (descriptor.Type & SamplerType.Mask) switch + { + SamplerType.Texture1D => Dim.Dim1D, + SamplerType.Texture2D => Dim.Dim2D, + SamplerType.Texture3D => Dim.Dim3D, + SamplerType.TextureCube => Dim.Cube, + SamplerType.TextureBuffer => Dim.Buffer, + _ => throw new InvalidOperationException($"Invalid sampler type \"{descriptor.Type & SamplerType.Mask}\".") + }; + + var imageType = context.TypeImage( + context.TypeFP32(), + dim, + descriptor.Type.HasFlag(SamplerType.Shadow), + descriptor.Type.HasFlag(SamplerType.Array), + descriptor.Type.HasFlag(SamplerType.Multisample), + 1, + ImageFormat.Unknown); + + var nameSuffix = meta.CbufSlot < 0 ? $"_tcb_{meta.Handle:X}" : $"_cb{meta.CbufSlot}_{meta.Handle:X}"; + + var sampledImageType = context.TypeSampledImage(imageType); + var sampledImagePointerType = context.TypePointer(StorageClass.UniformConstant, sampledImageType); + var sampledImageVariable = context.Variable(sampledImagePointerType, StorageClass.UniformConstant); + + context.Samplers.Add(meta, (imageType, sampledImageType, sampledImageVariable)); + context.SamplersTypes.Add(meta, descriptor.Type); + + context.Name(sampledImageVariable, $"{GetStagePrefix(context.Config.Stage)}_tex{nameSuffix}"); + context.Decorate(sampledImageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); + context.Decorate(sampledImageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding); + context.AddGlobalVariable(sampledImageVariable); + } + } + + private static void DeclareImages(CodeGenContext context, TextureDescriptor[] descriptors) + { + foreach (var descriptor in descriptors) + { + var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format); + + if (context.Images.ContainsKey(meta)) + { + continue; + } + + int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 3 : 0; + + var dim = GetDim(descriptor.Type); + + var imageType = context.TypeImage( + context.GetType(meta.Format.GetComponentType().Convert()), + dim, + descriptor.Type.HasFlag(SamplerType.Shadow), + descriptor.Type.HasFlag(SamplerType.Array), + descriptor.Type.HasFlag(SamplerType.Multisample), + AccessQualifier.ReadWrite, + GetImageFormat(meta.Format)); + + var nameSuffix = meta.CbufSlot < 0 ? + $"_tcb_{meta.Handle:X}_{meta.Format.ToGlslFormat()}" : + $"_cb{meta.CbufSlot}_{meta.Handle:X}_{meta.Format.ToGlslFormat()}"; + + var imagePointerType = context.TypePointer(StorageClass.UniformConstant, imageType); + var imageVariable = context.Variable(imagePointerType, StorageClass.UniformConstant); + + context.Images.Add(meta, (imageType, imageVariable)); + + context.Name(imageVariable, $"{GetStagePrefix(context.Config.Stage)}_img{nameSuffix}"); + context.Decorate(imageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); + context.Decorate(imageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding); + + if (descriptor.Flags.HasFlag(TextureUsageFlags.ImageCoherent)) + { + context.Decorate(imageVariable, Decoration.Coherent); + } + + context.AddGlobalVariable(imageVariable); + } + } + + private static Dim GetDim(SamplerType type) + { + return (type & SamplerType.Mask) switch + { + SamplerType.Texture1D => Dim.Dim1D, + SamplerType.Texture2D => Dim.Dim2D, + SamplerType.Texture3D => Dim.Dim3D, + SamplerType.TextureCube => Dim.Cube, + SamplerType.TextureBuffer => Dim.Buffer, + _ => throw new ArgumentException($"Invalid sampler type \"{type & SamplerType.Mask}\".") + }; + } + + private static ImageFormat GetImageFormat(TextureFormat format) + { + return format switch + { + TextureFormat.Unknown => ImageFormat.Unknown, + TextureFormat.R8Unorm => ImageFormat.R8, + TextureFormat.R8Snorm => ImageFormat.R8Snorm, + TextureFormat.R8Uint => ImageFormat.R8ui, + TextureFormat.R8Sint => ImageFormat.R8i, + TextureFormat.R16Float => ImageFormat.R16f, + TextureFormat.R16Unorm => ImageFormat.R16, + TextureFormat.R16Snorm => ImageFormat.R16Snorm, + TextureFormat.R16Uint => ImageFormat.R16ui, + TextureFormat.R16Sint => ImageFormat.R16i, + TextureFormat.R32Float => ImageFormat.R32f, + TextureFormat.R32Uint => ImageFormat.R32ui, + TextureFormat.R32Sint => ImageFormat.R32i, + TextureFormat.R8G8Unorm => ImageFormat.Rg8, + TextureFormat.R8G8Snorm => ImageFormat.Rg8Snorm, + TextureFormat.R8G8Uint => ImageFormat.Rg8ui, + TextureFormat.R8G8Sint => ImageFormat.Rg8i, + TextureFormat.R16G16Float => ImageFormat.Rg16f, + TextureFormat.R16G16Unorm => ImageFormat.Rg16, + TextureFormat.R16G16Snorm => ImageFormat.Rg16Snorm, + TextureFormat.R16G16Uint => ImageFormat.Rg16ui, + TextureFormat.R16G16Sint => ImageFormat.Rg16i, + TextureFormat.R32G32Float => ImageFormat.Rg32f, + TextureFormat.R32G32Uint => ImageFormat.Rg32ui, + TextureFormat.R32G32Sint => ImageFormat.Rg32i, + TextureFormat.R8G8B8A8Unorm => ImageFormat.Rgba8, + TextureFormat.R8G8B8A8Snorm => ImageFormat.Rgba8Snorm, + TextureFormat.R8G8B8A8Uint => ImageFormat.Rgba8ui, + TextureFormat.R8G8B8A8Sint => ImageFormat.Rgba8i, + TextureFormat.R16G16B16A16Float => ImageFormat.Rgba16f, + TextureFormat.R16G16B16A16Unorm => ImageFormat.Rgba16, + TextureFormat.R16G16B16A16Snorm => ImageFormat.Rgba16Snorm, + TextureFormat.R16G16B16A16Uint => ImageFormat.Rgba16ui, + TextureFormat.R16G16B16A16Sint => ImageFormat.Rgba16i, + TextureFormat.R32G32B32A32Float => ImageFormat.Rgba32f, + TextureFormat.R32G32B32A32Uint => ImageFormat.Rgba32ui, + TextureFormat.R32G32B32A32Sint => ImageFormat.Rgba32i, + TextureFormat.R10G10B10A2Unorm => ImageFormat.Rgb10A2, + TextureFormat.R10G10B10A2Uint => ImageFormat.Rgb10a2ui, + TextureFormat.R11G11B10Float => ImageFormat.R11fG11fB10f, + _ => throw new ArgumentException($"Invalid texture format \"{format}\".") + }; + } + + private static void DeclareInputAttributes(CodeGenContext context, StructuredProgramInfo info, bool perPatch) + { + bool iaIndexing = context.Config.UsedFeatures.HasFlag(FeatureFlags.IaIndexing); + var inputs = perPatch ? info.InputsPerPatch : info.Inputs; + + foreach (int attr in inputs) + { + if (!AttributeInfo.Validate(context.Config, attr, isOutAttr: false)) + { + continue; + } + + bool isUserAttr = attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd; + + if (iaIndexing && isUserAttr && !perPatch) + { + if (context.InputsArray == null) + { + var attrType = context.TypeVector(context.TypeFP32(), (LiteralInteger)4); + attrType = context.TypeArray(attrType, context.Constant(context.TypeU32(), (LiteralInteger)MaxAttributes)); + + if (context.Config.Stage == ShaderStage.Geometry) + { + attrType = context.TypeArray(attrType, context.Constant(context.TypeU32(), (LiteralInteger)context.InputVertices)); + } + + var spvType = context.TypePointer(StorageClass.Input, attrType); + var spvVar = context.Variable(spvType, StorageClass.Input); + + if (context.Config.PassthroughAttributes != 0 && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.Decorate(spvVar, Decoration.PassthroughNV); + } + + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)0); + + context.AddGlobalVariable(spvVar); + context.InputsArray = spvVar; + } + } + else + { + PixelImap iq = PixelImap.Unused; + + if (context.Config.Stage == ShaderStage.Fragment && + attr >= AttributeConsts.UserAttributeBase && + attr < AttributeConsts.UserAttributeEnd) + { + iq = context.Config.ImapTypes[(attr - AttributeConsts.UserAttributeBase) / 16].GetFirstUsedType(); + } + + DeclareInputOrOutput(context, attr, perPatch, isOutAttr: false, iq); + } + } + } + + private static void DeclareOutputAttributes(CodeGenContext context, StructuredProgramInfo info, bool perPatch) + { + bool oaIndexing = context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing); + var outputs = perPatch ? info.OutputsPerPatch : info.Outputs; + + foreach (int attr in outputs) + { + if (!AttributeInfo.Validate(context.Config, attr, isOutAttr: true)) + { + continue; + } + + bool isUserAttr = attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd; + + if (oaIndexing && isUserAttr && !perPatch) + { + if (context.OutputsArray == null) + { + var attrType = context.TypeVector(context.TypeFP32(), (LiteralInteger)4); + attrType = context.TypeArray(attrType, context.Constant(context.TypeU32(), (LiteralInteger)MaxAttributes)); + + var spvType = context.TypePointer(StorageClass.Output, attrType); + var spvVar = context.Variable(spvType, StorageClass.Output); + + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)0); + + context.AddGlobalVariable(spvVar); + context.OutputsArray = spvVar; + } + } + else + { + DeclareOutputAttribute(context, attr, perPatch); + } + } + + if (context.Config.Stage == ShaderStage.Vertex) + { + DeclareOutputAttribute(context, AttributeConsts.PositionX, perPatch: false); + } + } + + private static void DeclareOutputAttribute(CodeGenContext context, int attr, bool perPatch) + { + DeclareInputOrOutput(context, attr, perPatch, isOutAttr: true); + } + + public static void DeclareInvocationId(CodeGenContext context) + { + DeclareInputOrOutput(context, AttributeConsts.LaneId, perPatch: false, isOutAttr: false); + } + + private static void DeclareInputOrOutput(CodeGenContext context, int attr, bool perPatch, bool isOutAttr, PixelImap iq = PixelImap.Unused) + { + bool isUserAttr = attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd; + if (isUserAttr && context.Config.TransformFeedbackEnabled && !perPatch && + ((isOutAttr && context.Config.Stage != ShaderStage.Fragment) || + (!isOutAttr && context.Config.Stage != ShaderStage.Vertex))) + { + DeclareInputOrOutput(context, attr, (attr >> 2) & 3, isOutAttr, iq); + return; + } + + var dict = perPatch + ? (isOutAttr ? context.OutputsPerPatch : context.InputsPerPatch) + : (isOutAttr ? context.Outputs : context.Inputs); + + var attrInfo = AttributeInfo.From(context.Config, attr, isOutAttr); + + if (dict.ContainsKey(attrInfo.BaseValue)) + { + return; + } + + var storageClass = isOutAttr ? StorageClass.Output : StorageClass.Input; + var attrType = context.GetType(attrInfo.Type, attrInfo.Length); + bool builtInPassthrough = false; + + if (context.Config.Stage == ShaderStage.Geometry && !isOutAttr && (!attrInfo.IsBuiltin || AttributeInfo.IsArrayBuiltIn(attr))) + { + attrType = context.TypeArray(attrType, context.Constant(context.TypeU32(), (LiteralInteger)context.InputVertices)); + + if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + builtInPassthrough = true; + } + } + + var spvType = context.TypePointer(storageClass, attrType); + var spvVar = context.Variable(spvType, storageClass); + + if (perPatch) + { + context.Decorate(spvVar, Decoration.Patch); + } + + if (builtInPassthrough) + { + context.Decorate(spvVar, Decoration.PassthroughNV); + } + + if (attrInfo.IsBuiltin) + { + context.Decorate(spvVar, Decoration.BuiltIn, (LiteralInteger)GetBuiltIn(context, attrInfo.BaseValue)); + + if (context.Config.TransformFeedbackEnabled && isOutAttr) + { + var tfOutput = context.GetTransformFeedbackOutput(attrInfo.BaseValue); + if (tfOutput.Valid) + { + context.Decorate(spvVar, Decoration.XfbBuffer, (LiteralInteger)tfOutput.Buffer); + context.Decorate(spvVar, Decoration.XfbStride, (LiteralInteger)tfOutput.Stride); + context.Decorate(spvVar, Decoration.Offset, (LiteralInteger)tfOutput.Offset); + } + } + } + else if (isUserAttr) + { + int location = (attr - AttributeConsts.UserAttributeBase) / 16; + + if (perPatch) + { + location += 32; + } + + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location); + + if (!isOutAttr) + { + if (!perPatch && + (context.Config.PassthroughAttributes & (1 << location)) != 0 && + context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.Decorate(spvVar, Decoration.PassthroughNV); + } + + switch (iq) + { + case PixelImap.Constant: + context.Decorate(spvVar, Decoration.Flat); + break; + case PixelImap.ScreenLinear: + context.Decorate(spvVar, Decoration.NoPerspective); + break; + } + } + } + else if (attr >= AttributeConsts.FragmentOutputColorBase && attr < AttributeConsts.FragmentOutputColorEnd) + { + int location = (attr - AttributeConsts.FragmentOutputColorBase) / 16; + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location); + } + + context.AddGlobalVariable(spvVar); + dict.Add(attrInfo.BaseValue, spvVar); + } + + private static void DeclareInputOrOutput(CodeGenContext context, int attr, int component, bool isOutAttr, PixelImap iq = PixelImap.Unused) + { + var dict = isOutAttr ? context.Outputs : context.Inputs; + var attrInfo = AttributeInfo.From(context.Config, attr, isOutAttr); + + if (dict.ContainsKey(attr)) + { + return; + } + + var storageClass = isOutAttr ? StorageClass.Output : StorageClass.Input; + var attrType = context.GetType(attrInfo.Type & AggregateType.ElementTypeMask); + + if (context.Config.Stage == ShaderStage.Geometry && !isOutAttr && (!attrInfo.IsBuiltin || AttributeInfo.IsArrayBuiltIn(attr))) + { + attrType = context.TypeArray(attrType, context.Constant(context.TypeU32(), (LiteralInteger)context.InputVertices)); + } + + var spvType = context.TypePointer(storageClass, attrType); + var spvVar = context.Variable(spvType, storageClass); + + Debug.Assert(attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd); + int location = (attr - AttributeConsts.UserAttributeBase) / 16; + + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location); + context.Decorate(spvVar, Decoration.Component, (LiteralInteger)component); + + if (isOutAttr) + { + var tfOutput = context.GetTransformFeedbackOutput(location, component); + if (tfOutput.Valid) + { + context.Decorate(spvVar, Decoration.XfbBuffer, (LiteralInteger)tfOutput.Buffer); + context.Decorate(spvVar, Decoration.XfbStride, (LiteralInteger)tfOutput.Stride); + context.Decorate(spvVar, Decoration.Offset, (LiteralInteger)tfOutput.Offset); + } + } + else + { + if ((context.Config.PassthroughAttributes & (1 << location)) != 0 && + context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.Decorate(spvVar, Decoration.PassthroughNV); + } + + switch (iq) + { + case PixelImap.Constant: + context.Decorate(spvVar, Decoration.Flat); + break; + case PixelImap.ScreenLinear: + context.Decorate(spvVar, Decoration.NoPerspective); + break; + } + } + + context.AddGlobalVariable(spvVar); + dict.Add(attr, spvVar); + } + + private static BuiltIn GetBuiltIn(CodeGenContext context, int attr) + { + return attr switch + { + AttributeConsts.TessLevelOuter0 => BuiltIn.TessLevelOuter, + AttributeConsts.TessLevelInner0 => BuiltIn.TessLevelInner, + AttributeConsts.Layer => BuiltIn.Layer, + AttributeConsts.ViewportIndex => BuiltIn.ViewportIndex, + AttributeConsts.PointSize => BuiltIn.PointSize, + AttributeConsts.PositionX => context.Config.Stage == ShaderStage.Fragment ? BuiltIn.FragCoord : BuiltIn.Position, + AttributeConsts.ClipDistance0 => BuiltIn.ClipDistance, + AttributeConsts.PointCoordX => BuiltIn.PointCoord, + AttributeConsts.TessCoordX => BuiltIn.TessCoord, + AttributeConsts.InstanceId => BuiltIn.InstanceId, // FIXME: Invalid + AttributeConsts.VertexId => BuiltIn.VertexId, // FIXME: Invalid + AttributeConsts.FrontFacing => BuiltIn.FrontFacing, + AttributeConsts.FragmentOutputDepth => BuiltIn.FragDepth, + AttributeConsts.ThreadKill => BuiltIn.HelperInvocation, + AttributeConsts.ThreadIdX => BuiltIn.LocalInvocationId, + AttributeConsts.CtaIdX => BuiltIn.WorkgroupId, + AttributeConsts.LaneId => BuiltIn.SubgroupLocalInvocationId, + AttributeConsts.InvocationId => BuiltIn.InvocationId, + AttributeConsts.PrimitiveId => BuiltIn.PrimitiveId, + AttributeConsts.PatchVerticesIn => BuiltIn.PatchVertices, + AttributeConsts.EqMask => BuiltIn.SubgroupEqMask, + AttributeConsts.GeMask => BuiltIn.SubgroupGeMask, + AttributeConsts.GtMask => BuiltIn.SubgroupGtMask, + AttributeConsts.LeMask => BuiltIn.SubgroupLeMask, + AttributeConsts.LtMask => BuiltIn.SubgroupLtMask, + AttributeConsts.SupportBlockViewInverseX => BuiltIn.Position, + AttributeConsts.SupportBlockViewInverseY => BuiltIn.Position, + _ => throw new ArgumentException($"Invalid attribute number 0x{attr:X}.") + }; + } + + private static string GetStagePrefix(ShaderStage stage) + { + return StagePrefixes[(int)stage]; + } + } +} diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs new file mode 100644 index 000000000..0ddb42640 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs @@ -0,0 +1,38 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + static class EnumConversion + { + public static AggregateType Convert(this VariableType type) + { + return type switch + { + VariableType.None => AggregateType.Void, + VariableType.Bool => AggregateType.Bool, + VariableType.F32 => AggregateType.FP32, + VariableType.F64 => AggregateType.FP64, + VariableType.S32 => AggregateType.S32, + VariableType.U32 => AggregateType.U32, + _ => throw new ArgumentException($"Invalid variable type \"{type}\".") + }; + } + + public static ExecutionModel Convert(this ShaderStage stage) + { + return stage switch + { + ShaderStage.Compute => ExecutionModel.GLCompute, + ShaderStage.Vertex => ExecutionModel.Vertex, + ShaderStage.TessellationControl => ExecutionModel.TessellationControl, + ShaderStage.TessellationEvaluation => ExecutionModel.TessellationEvaluation, + ShaderStage.Geometry => ExecutionModel.Geometry, + ShaderStage.Fragment => ExecutionModel.Fragment, + _ => throw new ArgumentException($"Invalid shader stage \"{stage}\".") + }; + } + } +} diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs new file mode 100644 index 000000000..4c9e27e50 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -0,0 +1,2230 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + using SpvLiteralInteger = Spv.Generator.LiteralInteger; + + static class Instructions + { + private static readonly Func[] InstTable; + + static Instructions() + { + InstTable = new Func[(int)Instruction.Count]; + + Add(Instruction.Absolute, GenerateAbsolute); + Add(Instruction.Add, GenerateAdd); + Add(Instruction.AtomicAdd, GenerateAtomicAdd); + Add(Instruction.AtomicAnd, GenerateAtomicAnd); + Add(Instruction.AtomicCompareAndSwap, GenerateAtomicCompareAndSwap); + Add(Instruction.AtomicMinS32, GenerateAtomicMinS32); + Add(Instruction.AtomicMinU32, GenerateAtomicMinU32); + Add(Instruction.AtomicMaxS32, GenerateAtomicMaxS32); + Add(Instruction.AtomicMaxU32, GenerateAtomicMaxU32); + Add(Instruction.AtomicOr, GenerateAtomicOr); + Add(Instruction.AtomicSwap, GenerateAtomicSwap); + Add(Instruction.AtomicXor, GenerateAtomicXor); + Add(Instruction.Ballot, GenerateBallot); + Add(Instruction.Barrier, GenerateBarrier); + Add(Instruction.BitCount, GenerateBitCount); + Add(Instruction.BitfieldExtractS32, GenerateBitfieldExtractS32); + Add(Instruction.BitfieldExtractU32, GenerateBitfieldExtractU32); + Add(Instruction.BitfieldInsert, GenerateBitfieldInsert); + Add(Instruction.BitfieldReverse, GenerateBitfieldReverse); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.Call, GenerateCall); + Add(Instruction.Ceiling, GenerateCeiling); + Add(Instruction.Clamp, GenerateClamp); + Add(Instruction.ClampU32, GenerateClampU32); + Add(Instruction.Comment, GenerateComment); + Add(Instruction.CompareEqual, GenerateCompareEqual); + Add(Instruction.CompareGreater, GenerateCompareGreater); + Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual); + Add(Instruction.CompareGreaterOrEqualU32, GenerateCompareGreaterOrEqualU32); + Add(Instruction.CompareGreaterU32, GenerateCompareGreaterU32); + Add(Instruction.CompareLess, GenerateCompareLess); + Add(Instruction.CompareLessOrEqual, GenerateCompareLessOrEqual); + Add(Instruction.CompareLessOrEqualU32, GenerateCompareLessOrEqualU32); + Add(Instruction.CompareLessU32, GenerateCompareLessU32); + Add(Instruction.CompareNotEqual, GenerateCompareNotEqual); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertFP32ToFP64, GenerateConvertFP32ToFP64); + Add(Instruction.ConvertFP32ToS32, GenerateConvertFP32ToS32); + Add(Instruction.ConvertFP32ToU32, GenerateConvertFP32ToU32); + Add(Instruction.ConvertFP64ToFP32, GenerateConvertFP64ToFP32); + Add(Instruction.ConvertFP64ToS32, GenerateConvertFP64ToS32); + Add(Instruction.ConvertFP64ToU32, GenerateConvertFP64ToU32); + Add(Instruction.ConvertS32ToFP32, GenerateConvertS32ToFP32); + Add(Instruction.ConvertS32ToFP64, GenerateConvertS32ToFP64); + Add(Instruction.ConvertU32ToFP32, GenerateConvertU32ToFP32); + Add(Instruction.ConvertU32ToFP64, GenerateConvertU32ToFP64); + Add(Instruction.Cosine, GenerateCosine); + Add(Instruction.Ddx, GenerateDdx); + Add(Instruction.Ddy, GenerateDdy); + Add(Instruction.Discard, GenerateDiscard); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.EmitVertex, GenerateEmitVertex); + Add(Instruction.EndPrimitive, GenerateEndPrimitive); + Add(Instruction.ExponentB2, GenerateExponentB2); + Add(Instruction.FSIBegin, GenerateFSIBegin); + Add(Instruction.FSIEnd, GenerateFSIEnd); + Add(Instruction.FindLSB, GenerateFindLSB); + Add(Instruction.FindMSBS32, GenerateFindMSBS32); + Add(Instruction.FindMSBU32, GenerateFindMSBU32); + Add(Instruction.Floor, GenerateFloor); + Add(Instruction.FusedMultiplyAdd, GenerateFusedMultiplyAdd); + Add(Instruction.GroupMemoryBarrier, GenerateGroupMemoryBarrier); + Add(Instruction.ImageAtomic, GenerateImageAtomic); + Add(Instruction.ImageLoad, GenerateImageLoad); + Add(Instruction.ImageStore, GenerateImageStore); + Add(Instruction.IsNan, GenerateIsNan); + Add(Instruction.LoadAttribute, GenerateLoadAttribute); + Add(Instruction.LoadConstant, GenerateLoadConstant); + Add(Instruction.LoadLocal, GenerateLoadLocal); + Add(Instruction.LoadShared, GenerateLoadShared); + Add(Instruction.LoadStorage, GenerateLoadStorage); + Add(Instruction.Lod, GenerateLod); + Add(Instruction.LogarithmB2, GenerateLogarithmB2); + Add(Instruction.LogicalAnd, GenerateLogicalAnd); + Add(Instruction.LogicalExclusiveOr, GenerateLogicalExclusiveOr); + Add(Instruction.LogicalNot, GenerateLogicalNot); + Add(Instruction.LogicalOr, GenerateLogicalOr); + Add(Instruction.LoopBreak, GenerateLoopBreak); + Add(Instruction.LoopContinue, GenerateLoopContinue); + Add(Instruction.Maximum, GenerateMaximum); + Add(Instruction.MaximumU32, GenerateMaximumU32); + Add(Instruction.MemoryBarrier, GenerateMemoryBarrier); + Add(Instruction.Minimum, GenerateMinimum); + Add(Instruction.MinimumU32, GenerateMinimumU32); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.MultiplyHighS32, GenerateMultiplyHighS32); + Add(Instruction.MultiplyHighU32, GenerateMultiplyHighU32); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.PackDouble2x32, GeneratePackDouble2x32); + Add(Instruction.PackHalf2x16, GeneratePackHalf2x16); + Add(Instruction.ReciprocalSquareRoot, GenerateReciprocalSquareRoot); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.Round, GenerateRound); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightS32, GenerateShiftRightS32); + Add(Instruction.ShiftRightU32, GenerateShiftRightU32); + Add(Instruction.Shuffle, GenerateShuffle); + Add(Instruction.ShuffleDown, GenerateShuffleDown); + Add(Instruction.ShuffleUp, GenerateShuffleUp); + Add(Instruction.ShuffleXor, GenerateShuffleXor); + Add(Instruction.Sine, GenerateSine); + Add(Instruction.SquareRoot, GenerateSquareRoot); + Add(Instruction.StoreAttribute, GenerateStoreAttribute); + Add(Instruction.StoreLocal, GenerateStoreLocal); + Add(Instruction.StoreShared, GenerateStoreShared); + Add(Instruction.StoreShared16, GenerateStoreShared16); + Add(Instruction.StoreShared8, GenerateStoreShared8); + Add(Instruction.StoreStorage, GenerateStoreStorage); + Add(Instruction.StoreStorage16, GenerateStoreStorage16); + Add(Instruction.StoreStorage8, GenerateStoreStorage8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.SwizzleAdd, GenerateSwizzleAdd); + Add(Instruction.TextureSample, GenerateTextureSample); + Add(Instruction.TextureSize, GenerateTextureSize); + Add(Instruction.Truncate, GenerateTruncate); + Add(Instruction.UnpackDouble2x32, GenerateUnpackDouble2x32); + Add(Instruction.UnpackHalf2x16, GenerateUnpackHalf2x16); + Add(Instruction.VoteAll, GenerateVoteAll); + Add(Instruction.VoteAllEqual, GenerateVoteAllEqual); + Add(Instruction.VoteAny, GenerateVoteAny); + } + + private static void Add(Instruction inst, Func handler) + { + InstTable[(int)(inst & Instruction.Mask)] = handler; + } + + public static OperationResult Generate(CodeGenContext context, AstOperation operation) + { + var handler = InstTable[(int)(operation.Inst & Instruction.Mask)]; + if (handler != null) + { + return handler(context, operation); + } + else + { + throw new NotImplementedException(operation.Inst.ToString()); + } + } + + private static OperationResult GenerateAbsolute(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslFAbs, context.Delegates.GlslSAbs); + } + + private static OperationResult GenerateAdd(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FAdd, context.Delegates.IAdd); + } + + private static OperationResult GenerateAtomicAdd(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicIAdd); + } + + private static OperationResult GenerateAtomicAnd(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicAnd); + } + + private static OperationResult GenerateAtomicCompareAndSwap(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryCas(context, operation); + } + + private static OperationResult GenerateAtomicMinS32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicSMin); + } + + private static OperationResult GenerateAtomicMinU32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicUMin); + } + + private static OperationResult GenerateAtomicMaxS32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicSMax); + } + + private static OperationResult GenerateAtomicMaxU32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicUMax); + } + + private static OperationResult GenerateAtomicOr(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicOr); + } + + private static OperationResult GenerateAtomicSwap(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicExchange); + } + + private static OperationResult GenerateAtomicXor(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicXor); + } + + private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + var uvec4Type = context.TypeVector(context.TypeU32(), 4); + var execution = context.Constant(context.TypeU32(), 3); // Subgroup + + var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source)); + var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)0); + + return new OperationResult(AggregateType.U32, mask); + } + + private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation) + { + context.ControlBarrier( + context.Constant(context.TypeU32(), 2), + context.Constant(context.TypeU32(), 2), + context.Constant(context.TypeU32(), 264)); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateBitCount(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryS32(context, operation, context.Delegates.BitCount); + } + + private static OperationResult GenerateBitfieldExtractS32(CodeGenContext context, AstOperation operation) + { + return GenerateTernaryS32(context, operation, context.Delegates.BitFieldSExtract); + } + + private static OperationResult GenerateBitfieldExtractU32(CodeGenContext context, AstOperation operation) + { + return GenerateTernaryS32(context, operation, context.Delegates.BitFieldUExtract); + } + + private static OperationResult GenerateBitfieldInsert(CodeGenContext context, AstOperation operation) + { + return GenerateQuaternaryS32(context, operation, context.Delegates.BitFieldInsert); + } + + private static OperationResult GenerateBitfieldReverse(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryS32(context, operation, context.Delegates.BitReverse); + } + + private static OperationResult GenerateBitwiseAnd(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.BitwiseAnd); + } + + private static OperationResult GenerateBitwiseExclusiveOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.BitwiseXor); + } + + private static OperationResult GenerateBitwiseNot(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryS32(context, operation, context.Delegates.Not); + } + + private static OperationResult GenerateBitwiseOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.BitwiseOr); + } + + private static OperationResult GenerateCall(CodeGenContext context, AstOperation operation) + { + AstOperand funcId = (AstOperand)operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + (var function, var spvFunc) = context.GetFunction(funcId.Value); + + var args = new SpvInstruction[operation.SourcesCount - 1]; + var spvLocals = context.GetLocalForArgsPointers(funcId.Value); + + for (int i = 0; i < args.Length; i++) + { + var operand = (AstOperand)operation.GetSource(i + 1); + if (i >= function.InArguments.Length) + { + args[i] = context.GetLocalPointer(operand); + } + else + { + var type = function.GetArgumentType(i).Convert(); + var value = context.Get(type, operand); + var spvLocal = spvLocals[i]; + + context.Store(spvLocal, value); + + args[i] = spvLocal; + } + } + + var retType = function.ReturnType.Convert(); + var result = context.FunctionCall(context.GetType(retType), spvFunc, args); + return new OperationResult(retType, result); + } + + private static OperationResult GenerateCeiling(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslCeil, null); + } + + private static OperationResult GenerateClamp(CodeGenContext context, AstOperation operation) + { + return GenerateTernary(context, operation, context.Delegates.GlslFClamp, context.Delegates.GlslSClamp); + } + + private static OperationResult GenerateClampU32(CodeGenContext context, AstOperation operation) + { + return GenerateTernaryU32(context, operation, context.Delegates.GlslUClamp); + } + + private static OperationResult GenerateComment(CodeGenContext context, AstOperation operation) + { + return OperationResult.Invalid; + } + + private static OperationResult GenerateCompareEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdEqual, context.Delegates.IEqual); + } + + private static OperationResult GenerateCompareGreater(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdGreaterThan, context.Delegates.SGreaterThan); + } + + private static OperationResult GenerateCompareGreaterOrEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdGreaterThanEqual, context.Delegates.SGreaterThanEqual); + } + + private static OperationResult GenerateCompareGreaterOrEqualU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.UGreaterThanEqual); + } + + private static OperationResult GenerateCompareGreaterU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.UGreaterThan); + } + + private static OperationResult GenerateCompareLess(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdLessThan, context.Delegates.SLessThan); + } + + private static OperationResult GenerateCompareLessOrEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdLessThanEqual, context.Delegates.SLessThanEqual); + } + + private static OperationResult GenerateCompareLessOrEqualU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.ULessThanEqual); + } + + private static OperationResult GenerateCompareLessU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.ULessThan); + } + + private static OperationResult GenerateCompareNotEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdNotEqual, context.Delegates.INotEqual); + } + + private static OperationResult GenerateConditionalSelect(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + var cond = context.Get(AggregateType.Bool, src1); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + return new OperationResult(AggregateType.FP64, context.Select(context.TypeFP64(), cond, context.GetFP64(src2), context.GetFP64(src3))); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + return new OperationResult(AggregateType.FP32, context.Select(context.TypeFP32(), cond, context.GetFP32(src2), context.GetFP32(src3))); + } + else + { + return new OperationResult(AggregateType.S32, context.Select(context.TypeS32(), cond, context.GetS32(src2), context.GetS32(src3))); + } + } + + private static OperationResult GenerateConvertFP32ToFP64(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP64, context.FConvert(context.TypeFP64(), context.GetFP32(source))); + } + + private static OperationResult GenerateConvertFP32ToS32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.S32, context.ConvertFToS(context.TypeS32(), context.GetFP32(source))); + } + + private static OperationResult GenerateConvertFP32ToU32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.U32, context.ConvertFToU(context.TypeU32(), context.GetFP32(source))); + } + + private static OperationResult GenerateConvertFP64ToFP32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP32, context.FConvert(context.TypeFP32(), context.GetFP64(source))); + } + + private static OperationResult GenerateConvertFP64ToS32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.S32, context.ConvertFToS(context.TypeS32(), context.GetFP64(source))); + } + + private static OperationResult GenerateConvertFP64ToU32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.U32, context.ConvertFToU(context.TypeU32(), context.GetFP64(source))); + } + + private static OperationResult GenerateConvertS32ToFP32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP32, context.ConvertSToF(context.TypeFP32(), context.GetS32(source))); + } + + private static OperationResult GenerateConvertS32ToFP64(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP64, context.ConvertSToF(context.TypeFP64(), context.GetS32(source))); + } + + private static OperationResult GenerateConvertU32ToFP32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP32, context.ConvertUToF(context.TypeFP32(), context.GetU32(source))); + } + + private static OperationResult GenerateConvertU32ToFP64(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP64, context.ConvertUToF(context.TypeFP64(), context.GetU32(source))); + } + + private static OperationResult GenerateCosine(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslCos, null); + } + + private static OperationResult GenerateDdx(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryFP32(context, operation, context.Delegates.DPdx); + } + + private static OperationResult GenerateDdy(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryFP32(context, operation, context.Delegates.DPdy); + } + + private static OperationResult GenerateDiscard(CodeGenContext context, AstOperation operation) + { + context.Kill(); + return OperationResult.Invalid; + } + + private static OperationResult GenerateDivide(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FDiv, context.Delegates.SDiv); + } + + private static OperationResult GenerateEmitVertex(CodeGenContext context, AstOperation operation) + { + context.EmitVertex(); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateEndPrimitive(CodeGenContext context, AstOperation operation) + { + context.EndPrimitive(); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateExponentB2(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslExp2, null); + } + + private static OperationResult GenerateFSIBegin(CodeGenContext context, AstOperation operation) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.BeginInvocationInterlockEXT(); + } + + return OperationResult.Invalid; + } + + private static OperationResult GenerateFSIEnd(CodeGenContext context, AstOperation operation) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.EndInvocationInterlockEXT(); + } + + return OperationResult.Invalid; + } + + private static OperationResult GenerateFindLSB(CodeGenContext context, AstOperation operation) + { + var source = context.GetU32(operation.GetSource(0)); + return new OperationResult(AggregateType.U32, context.GlslFindILsb(context.TypeU32(), source)); + } + + private static OperationResult GenerateFindMSBS32(CodeGenContext context, AstOperation operation) + { + var source = context.GetS32(operation.GetSource(0)); + return new OperationResult(AggregateType.U32, context.GlslFindSMsb(context.TypeU32(), source)); + } + + private static OperationResult GenerateFindMSBU32(CodeGenContext context, AstOperation operation) + { + var source = context.GetU32(operation.GetSource(0)); + return new OperationResult(AggregateType.U32, context.GlslFindUMsb(context.TypeU32(), source)); + } + + private static OperationResult GenerateFloor(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslFloor, null); + } + + private static OperationResult GenerateFusedMultiplyAdd(CodeGenContext context, AstOperation operation) + { + return GenerateTernary(context, operation, context.Delegates.GlslFma, null); + } + + private static OperationResult GenerateGroupMemoryBarrier(CodeGenContext context, AstOperation operation) + { + context.MemoryBarrier(context.Constant(context.TypeU32(), 2), context.Constant(context.TypeU32(), 3400)); + return OperationResult.Invalid; + } + + private static OperationResult GenerateImageAtomic(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + var componentType = texOp.Format.GetComponentType(); + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + return new OperationResult(componentType.Convert(), componentType switch + { + VariableType.S32 => context.Constant(context.TypeS32(), 0), + VariableType.U32 => context.Constant(context.TypeU32(), 0u), + _ => context.Constant(context.TypeFP32(), 0f), + }); + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.S32); + } + + SpvInstruction value = Src(componentType.Convert()); + + (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)]; + + var image = context.Load(imageType, imageVariable); + + SpvInstruction resultType = context.GetType(componentType.Convert()); + SpvInstruction imagePointerType = context.TypePointer(StorageClass.Image, resultType); + + var pointer = context.ImageTexelPointer(imagePointerType, imageVariable, pCoords, context.Constant(context.TypeU32(), 0)); + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + var result = (texOp.Flags & TextureFlags.AtomicMask) switch + { + TextureFlags.Add => context.AtomicIAdd(resultType, pointer, one, zero, value), + TextureFlags.Minimum => componentType == VariableType.S32 + ? context.AtomicSMin(resultType, pointer, one, zero, value) + : context.AtomicUMin(resultType, pointer, one, zero, value), + TextureFlags.Maximum => componentType == VariableType.S32 + ? context.AtomicSMax(resultType, pointer, one, zero, value) + : context.AtomicUMax(resultType, pointer, one, zero, value), + TextureFlags.Increment => context.AtomicIIncrement(resultType, pointer, one, zero), + TextureFlags.Decrement => context.AtomicIDecrement(resultType, pointer, one, zero), + TextureFlags.BitwiseAnd => context.AtomicAnd(resultType, pointer, one, zero, value), + TextureFlags.BitwiseOr => context.AtomicOr(resultType, pointer, one, zero, value), + TextureFlags.BitwiseXor => context.AtomicXor(resultType, pointer, one, zero, value), + TextureFlags.Swap => context.AtomicExchange(resultType, pointer, one, zero, value), + TextureFlags.CAS => context.AtomicCompareExchange(resultType, pointer, one, zero, zero, Src(componentType.Convert()), value), + _ => context.AtomicIAdd(resultType, pointer, one, zero, value), + }; + + return new OperationResult(componentType.Convert(), result); + } + + private static OperationResult GenerateImageLoad(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + var componentType = texOp.Format.GetComponentType(); + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + var zero = componentType switch + { + VariableType.S32 => context.Constant(context.TypeS32(), 0), + VariableType.U32 => context.Constant(context.TypeU32(), 0u), + _ => context.Constant(context.TypeFP32(), 0f), + }; + + return new OperationResult(componentType.Convert(), zero); + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.S32); + } + + pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords: true, isBindless, isIndexed, isArray, pCount); + + (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)]; + + var image = context.Load(imageType, imageVariable); + var imageComponentType = context.GetType(componentType.Convert()); + + var texel = context.ImageRead(context.TypeVector(imageComponentType, 4), image, pCoords, ImageOperandsMask.MaskNone); + var result = context.CompositeExtract(imageComponentType, texel, (SpvLiteralInteger)texOp.Index); + + return new OperationResult(componentType.Convert(), result); + } + + private static OperationResult GenerateImageStore(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + return OperationResult.Invalid; + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.S32); + } + + var componentType = texOp.Format.GetComponentType(); + + const int ComponentsCount = 4; + + SpvInstruction[] cElems = new SpvInstruction[ComponentsCount]; + + for (int i = 0; i < ComponentsCount; i++) + { + if (srcIndex < texOp.SourcesCount) + { + cElems[i] = Src(componentType.Convert()); + } + else + { + cElems[i] = componentType switch + { + VariableType.S32 => context.Constant(context.TypeS32(), 0), + VariableType.U32 => context.Constant(context.TypeU32(), 0u), + _ => context.Constant(context.TypeFP32(), 0f), + }; + } + } + + var texel = context.CompositeConstruct(context.TypeVector(context.GetType(componentType.Convert()), ComponentsCount), cElems); + + (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)]; + + var image = context.Load(imageType, imageVariable); + + context.ImageWrite(image, pCoords, texel, ImageOperandsMask.MaskNone); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateIsNan(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + SpvInstruction result; + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + result = context.IsNan(context.TypeBool(), context.GetFP64(source)); + } + else + { + result = context.IsNan(context.TypeBool(), context.GetFP32(source)); + } + + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateLoadAttribute(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + if (!(src1 is AstOperand baseAttr) || baseAttr.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {nameof(Instruction.LoadAttribute)} must be a constant operand."); + } + + var index = context.Get(AggregateType.S32, src3); + var resultType = AggregateType.FP32; + + if (src2 is AstOperand operand && operand.Type == OperandType.Constant) + { + int attrOffset = baseAttr.Value + (operand.Value << 2); + return new OperationResult(resultType, context.GetAttribute(resultType, attrOffset, isOutAttr: false, index)); + } + else + { + var attr = context.Get(AggregateType.S32, src2); + return new OperationResult(resultType, context.GetAttribute(resultType, attr, isOutAttr: false, index)); + } + } + + private static OperationResult GenerateLoadConstant(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = context.Get(AggregateType.S32, operation.GetSource(1)); + + var i1 = context.Constant(context.TypeS32(), 0); + var i2 = context.ShiftRightArithmetic(context.TypeS32(), src2, context.Constant(context.TypeS32(), 2)); + var i3 = context.BitwiseAnd(context.TypeS32(), src2, context.Constant(context.TypeS32(), 3)); + + SpvInstruction value = null; + + if (context.Config.GpuAccessor.QueryHostHasVectorIndexingBug()) + { + // Test for each component individually. + for (int i = 0; i < 4; i++) + { + var component = context.Constant(context.TypeS32(), i); + + SpvInstruction elemPointer; + if (context.UniformBuffersArray != null) + { + var ubVariable = context.UniformBuffersArray; + var i0 = context.Get(AggregateType.S32, src1); + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i0, i1, i2, component); + } + else + { + var ubVariable = context.UniformBuffers[((AstOperand)src1).Value]; + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i1, i2, component); + } + + SpvInstruction newValue = context.Load(context.TypeFP32(), elemPointer); + + value = value != null ? context.Select(context.TypeFP32(), context.IEqual(context.TypeBool(), i3, component), newValue, value) : newValue; + } + } + else + { + SpvInstruction elemPointer; + + if (context.UniformBuffersArray != null) + { + var ubVariable = context.UniformBuffersArray; + var i0 = context.Get(AggregateType.S32, src1); + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i0, i1, i2, i3); + } + else + { + var ubVariable = context.UniformBuffers[((AstOperand)src1).Value]; + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i1, i2, i3); + } + + value = context.Load(context.TypeFP32(), elemPointer); + } + + return new OperationResult(AggregateType.FP32, value); + } + + private static OperationResult GenerateLoadLocal(CodeGenContext context, AstOperation operation) + { + return GenerateLoadLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); + } + + private static OperationResult GenerateLoadShared(CodeGenContext context, AstOperation operation) + { + return GenerateLoadLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory); + } + + private static OperationResult GenerateLoadLocalOrShared( + CodeGenContext context, + AstOperation operation, + StorageClass storageClass, + SpvInstruction memory) + { + var offset = context.Get(AggregateType.S32, operation.GetSource(0)); + + var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset); + var value = context.Load(context.TypeU32(), elemPointer); + + return new OperationResult(AggregateType.U32, value); + } + + private static OperationResult GenerateLoadStorage(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetStorageElemPointer(context, operation); + var value = context.Load(context.TypeU32(), elemPointer); + + return new OperationResult(AggregateType.U32, value); + } + + private static OperationResult GenerateLod(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return new OperationResult(AggregateType.S32, context.Constant(context.TypeS32(), 0)); + } + + int srcIndex = 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int pCount = texOp.Type.GetDimensions(); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.FP32); + } + + var vectorType = context.TypeVector(context.TypeFP32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.FP32); + } + + var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format); + + (_, var sampledImageType, var sampledImageVariable) = context.Samplers[meta]; + + var image = context.Load(sampledImageType, sampledImageVariable); + + var resultType = context.TypeVector(context.TypeFP32(), 2); + var packed = context.ImageQueryLod(resultType, image, pCoords); + var result = context.CompositeExtract(context.TypeFP32(), packed, (SpvLiteralInteger)texOp.Index); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateLogarithmB2(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslLog2, null); + } + + private static OperationResult GenerateLogicalAnd(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryBool(context, operation, context.Delegates.LogicalAnd); + } + + private static OperationResult GenerateLogicalExclusiveOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryBool(context, operation, context.Delegates.LogicalNotEqual); + } + + private static OperationResult GenerateLogicalNot(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryBool(context, operation, context.Delegates.LogicalNot); + } + + private static OperationResult GenerateLogicalOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryBool(context, operation, context.Delegates.LogicalOr); + } + + private static OperationResult GenerateLoopBreak(CodeGenContext context, AstOperation operation) + { + AstBlock loopBlock = context.CurrentBlock; + while (loopBlock.Type != AstBlockType.DoWhile) + { + loopBlock = loopBlock.Parent; + } + + context.Branch(context.GetNextLabel(loopBlock.Parent)); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateLoopContinue(CodeGenContext context, AstOperation operation) + { + AstBlock loopBlock = context.CurrentBlock; + while (loopBlock.Type != AstBlockType.DoWhile) + { + loopBlock = loopBlock.Parent; + } + + (var loopTarget, var continueTarget) = context.LoopTargets[loopBlock]; + + context.Branch(continueTarget); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateMaximum(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.GlslFMax, context.Delegates.GlslSMax); + } + + private static OperationResult GenerateMaximumU32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryU32(context, operation, context.Delegates.GlslUMax); + } + + private static OperationResult GenerateMemoryBarrier(CodeGenContext context, AstOperation operation) + { + context.MemoryBarrier(context.Constant(context.TypeU32(), 1), context.Constant(context.TypeU32(), 3400)); + return OperationResult.Invalid; + } + + private static OperationResult GenerateMinimum(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.GlslFMin, context.Delegates.GlslSMin); + } + + private static OperationResult GenerateMinimumU32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryU32(context, operation, context.Delegates.GlslUMin); + } + + private static OperationResult GenerateMultiply(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FMul, context.Delegates.IMul); + } + + private static OperationResult GenerateMultiplyHighS32(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var resultType = context.TypeStruct(false, context.TypeS32(), context.TypeS32()); + var result = context.SMulExtended(resultType, context.GetS32(src1), context.GetS32(src2)); + result = context.CompositeExtract(context.TypeS32(), result, 1); + + return new OperationResult(AggregateType.S32, result); + } + + private static OperationResult GenerateMultiplyHighU32(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var resultType = context.TypeStruct(false, context.TypeU32(), context.TypeU32()); + var result = context.UMulExtended(resultType, context.GetU32(src1), context.GetU32(src2)); + result = context.CompositeExtract(context.TypeU32(), result, 1); + + return new OperationResult(AggregateType.U32, result); + } + + private static OperationResult GenerateNegate(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.FNegate, context.Delegates.SNegate); + } + + private static OperationResult GeneratePackDouble2x32(CodeGenContext context, AstOperation operation) + { + var value0 = context.GetU32(operation.GetSource(0)); + var value1 = context.GetU32(operation.GetSource(1)); + var vector = context.CompositeConstruct(context.TypeVector(context.TypeU32(), 2), value0, value1); + var result = context.GlslPackDouble2x32(context.TypeFP64(), vector); + + return new OperationResult(AggregateType.FP64, result); + } + + private static OperationResult GeneratePackHalf2x16(CodeGenContext context, AstOperation operation) + { + var value0 = context.GetFP32(operation.GetSource(0)); + var value1 = context.GetFP32(operation.GetSource(1)); + var vector = context.CompositeConstruct(context.TypeVector(context.TypeFP32(), 2), value0, value1); + var result = context.GlslPackHalf2x16(context.TypeU32(), vector); + + return new OperationResult(AggregateType.U32, result); + } + + private static OperationResult GenerateReciprocalSquareRoot(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslInverseSqrt, null); + } + + private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation) + { + context.Return(); + return OperationResult.Invalid; + } + + private static OperationResult GenerateRound(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslRoundEven, null); + } + + private static OperationResult GenerateShiftLeft(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.ShiftLeftLogical); + } + + private static OperationResult GenerateShiftRightS32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.ShiftRightArithmetic); + } + + private static OperationResult GenerateShiftRightU32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.ShiftRightLogical); + } + + private static OperationResult GenerateShuffle(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + var notSegMask = context.Not(context.TypeU32(), segMask); + var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); + var indexNotSegMask = context.BitwiseAnd(context.TypeU32(), index, notSegMask); + + var threadId = context.GetAttribute(AggregateType.U32, AttributeConsts.LaneId, false); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); + var srcThreadId = context.BitwiseOr(context.TypeU32(), indexNotSegMask, minThreadId); + var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); + var value = context.SubgroupReadInvocationKHR(context.TypeFP32(), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType.Convert(), AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateShuffleDown(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + var notSegMask = context.Not(context.TypeU32(), segMask); + var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); + + var threadId = context.GetAttribute(AggregateType.U32, AttributeConsts.LaneId, false); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); + var srcThreadId = context.IAdd(context.TypeU32(), threadId, index); + var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); + var value = context.SubgroupReadInvocationKHR(context.TypeFP32(), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType.Convert(), AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateShuffleUp(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + + var threadId = context.GetAttribute(AggregateType.U32, AttributeConsts.LaneId, false); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var srcThreadId = context.ISub(context.TypeU32(), threadId, index); + var valid = context.SGreaterThanEqual(context.TypeBool(), srcThreadId, minThreadId); + var value = context.SubgroupReadInvocationKHR(context.TypeFP32(), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType.Convert(), AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateShuffleXor(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + var notSegMask = context.Not(context.TypeU32(), segMask); + var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); + + var threadId = context.GetAttribute(AggregateType.U32, AttributeConsts.LaneId, false); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); + var srcThreadId = context.BitwiseXor(context.TypeU32(), threadId, index); + var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); + var value = context.SubgroupReadInvocationKHR(context.TypeFP32(), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType.Convert(), AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateSine(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslSin, null); + } + + private static OperationResult GenerateSquareRoot(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslSqrt, null); + } + + private static OperationResult GenerateStoreAttribute(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + if (!(src1 is AstOperand baseAttr) || baseAttr.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {nameof(Instruction.StoreAttribute)} must be a constant operand."); + } + + SpvInstruction elemPointer; + AggregateType elemType; + + if (src2 is AstOperand operand && operand.Type == OperandType.Constant) + { + int attrOffset = baseAttr.Value + (operand.Value << 2); + elemPointer = context.GetAttributeElemPointer(attrOffset, isOutAttr: true, index: null, out elemType); + } + else + { + var attr = context.Get(AggregateType.S32, src2); + elemPointer = context.GetAttributeElemPointer(attr, isOutAttr: true, index: null, out elemType); + } + + var value = context.Get(elemType, src3); + context.Store(elemPointer, value); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreLocal(CodeGenContext context, AstOperation operation) + { + return GenerateStoreLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); + } + + private static OperationResult GenerateStoreShared(CodeGenContext context, AstOperation operation) + { + return GenerateStoreLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory); + } + + private static OperationResult GenerateStoreLocalOrShared( + CodeGenContext context, + AstOperation operation, + StorageClass storageClass, + SpvInstruction memory) + { + var offset = context.Get(AggregateType.S32, operation.GetSource(0)); + var value = context.Get(AggregateType.U32, operation.GetSource(1)); + + var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset); + context.Store(elemPointer, value); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreShared16(CodeGenContext context, AstOperation operation) + { + GenerateStoreSharedSmallInt(context, operation, 16); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreShared8(CodeGenContext context, AstOperation operation) + { + GenerateStoreSharedSmallInt(context, operation, 8); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreStorage(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetStorageElemPointer(context, operation); + context.Store(elemPointer, context.Get(AggregateType.U32, operation.GetSource(2))); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreStorage16(CodeGenContext context, AstOperation operation) + { + GenerateStoreStorageSmallInt(context, operation, 16); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreStorage8(CodeGenContext context, AstOperation operation) + { + GenerateStoreStorageSmallInt(context, operation, 8); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateSubtract(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FSub, context.Delegates.ISub); + } + + private static OperationResult GenerateSwizzleAdd(CodeGenContext context, AstOperation operation) + { + var x = context.Get(AggregateType.FP32, operation.GetSource(0)); + var y = context.Get(AggregateType.FP32, operation.GetSource(1)); + var mask = context.Get(AggregateType.U32, operation.GetSource(2)); + + var v4float = context.TypeVector(context.TypeFP32(), 4); + var one = context.Constant(context.TypeFP32(), 1.0f); + var minusOne = context.Constant(context.TypeFP32(), -1.0f); + var zero = context.Constant(context.TypeFP32(), 0.0f); + var xLut = context.ConstantComposite(v4float, one, minusOne, one, zero); + var yLut = context.ConstantComposite(v4float, one, one, minusOne, one); + + var threadId = context.GetAttribute(AggregateType.U32, AttributeConsts.LaneId, false); + var shift = context.BitwiseAnd(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 3)); + shift = context.ShiftLeftLogical(context.TypeU32(), shift, context.Constant(context.TypeU32(), 1)); + var lutIdx = context.ShiftRightLogical(context.TypeU32(), mask, shift); + + var xLutValue = context.VectorExtractDynamic(context.TypeFP32(), xLut, lutIdx); + var yLutValue = context.VectorExtractDynamic(context.TypeFP32(), yLut, lutIdx); + + var xResult = context.FMul(context.TypeFP32(), x, xLutValue); + var yResult = context.FMul(context.TypeFP32(), y, yLutValue); + var result = context.FAdd(context.TypeFP32(), xResult, yResult); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateTextureSample(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; + bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0; + bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; + bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; + bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; + bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; + bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0; + bool isShadow = (texOp.Type & SamplerType.Shadow) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return new OperationResult(AggregateType.FP32, context.Constant(context.TypeFP32(), 0f)); + } + + // This combination is valid, but not available on GLSL. + // For now, ignore the LOD level and do a normal sample. + // TODO: How to implement it properly? + if (hasLodLevel && isArray && isShadow) + { + hasLodLevel = false; + } + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount; + + int arrayIndexElem = -1; + + if (isArray) + { + arrayIndexElem = pCount++; + } + + AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32; + + SpvInstruction AssemblePVector(int count) + { + if (count > 1) + { + SpvInstruction[] elems = new SpvInstruction[count]; + + for (int index = 0; index < count; index++) + { + if (arrayIndexElem == index) + { + elems[index] = Src(AggregateType.S32); + + if (!intCoords) + { + elems[index] = context.ConvertSToF(context.TypeFP32(), elems[index]); + } + } + else + { + elems[index] = Src(coordType); + } + } + + var vectorType = context.TypeVector(intCoords ? context.TypeS32() : context.TypeFP32(), count); + return context.CompositeConstruct(vectorType, elems); + } + else + { + return Src(coordType); + } + } + + SpvInstruction pCoords = AssemblePVector(pCount); + pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount); + + SpvInstruction AssembleDerivativesVector(int count) + { + if (count > 1) + { + SpvInstruction[] elems = new SpvInstruction[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.FP32); + } + + var vectorType = context.TypeVector(context.TypeFP32(), count); + return context.CompositeConstruct(vectorType, elems); + } + else + { + return Src(AggregateType.FP32); + } + } + + SpvInstruction dRef = null; + + if (isShadow) + { + dRef = Src(AggregateType.FP32); + } + + SpvInstruction[] derivatives = null; + + if (hasDerivatives) + { + derivatives = new[] + { + AssembleDerivativesVector(coordsCount), // dPdx + AssembleDerivativesVector(coordsCount) // dPdy + }; + } + + SpvInstruction sample = null; + SpvInstruction lod = null; + + if (isMultisample) + { + sample = Src(AggregateType.S32); + } + else if (hasLodLevel) + { + lod = Src(coordType); + } + + SpvInstruction AssembleOffsetVector(int count) + { + if (count > 1) + { + SpvInstruction[] elems = new SpvInstruction[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), count); + + return context.ConstantComposite(vectorType, elems); + } + else + { + return Src(AggregateType.S32); + } + } + + SpvInstruction[] offsets = null; + + if (hasOffset) + { + offsets = new[] { AssembleOffsetVector(coordsCount) }; + } + else if (hasOffsets) + { + offsets = new[] + { + AssembleOffsetVector(coordsCount), + AssembleOffsetVector(coordsCount), + AssembleOffsetVector(coordsCount), + AssembleOffsetVector(coordsCount) + }; + } + + SpvInstruction lodBias = null; + + if (hasLodBias) + { + lodBias = Src(AggregateType.FP32); + } + + SpvInstruction compIdx = null; + + // textureGather* optional extra component index, + // not needed for shadow samplers. + if (isGather && !isShadow) + { + compIdx = Src(AggregateType.S32); + } + + var operandsList = new List(); + var operandsMask = ImageOperandsMask.MaskNone; + + if (hasLodBias) + { + operandsMask |= ImageOperandsMask.Bias; + operandsList.Add(lodBias); + } + + if (!isMultisample && hasLodLevel) + { + operandsMask |= ImageOperandsMask.Lod; + operandsList.Add(lod); + } + + if (hasDerivatives) + { + operandsMask |= ImageOperandsMask.Grad; + operandsList.Add(derivatives[0]); + operandsList.Add(derivatives[1]); + } + + if (hasOffset) + { + operandsMask |= ImageOperandsMask.ConstOffset; + operandsList.Add(offsets[0]); + } + else if (hasOffsets) + { + operandsMask |= ImageOperandsMask.ConstOffsets; + SpvInstruction arrayv2 = context.TypeArray(context.TypeVector(context.TypeS32(), 2), context.Constant(context.TypeU32(), 4)); + operandsList.Add(context.CompositeConstruct(arrayv2, offsets[0], offsets[1], offsets[2], offsets[3])); + } + + if (isMultisample) + { + operandsMask |= ImageOperandsMask.Sample; + operandsList.Add(sample); + } + + bool colorIsVector = isGather || !isShadow; + var resultType = colorIsVector ? context.TypeVector(context.TypeFP32(), 4) : context.TypeFP32(); + + var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format); + + (var imageType, var sampledImageType, var sampledImageVariable) = context.Samplers[meta]; + + var image = context.Load(sampledImageType, sampledImageVariable); + + if (intCoords) + { + image = context.Image(imageType, image); + } + + var operands = operandsList.ToArray(); + + SpvInstruction result; + + if (intCoords) + { + result = context.ImageFetch(resultType, image, pCoords, operandsMask, operands); + } + else if (isGather) + { + if (isShadow) + { + result = context.ImageDrefGather(resultType, image, pCoords, dRef, operandsMask, operands); + } + else + { + result = context.ImageGather(resultType, image, pCoords, compIdx, operandsMask, operands); + } + } + else if (isShadow) + { + if (hasLodLevel) + { + result = context.ImageSampleDrefExplicitLod(resultType, image, pCoords, dRef, operandsMask, operands); + } + else + { + result = context.ImageSampleDrefImplicitLod(resultType, image, pCoords, dRef, operandsMask, operands); + } + } + else if (hasDerivatives || hasLodLevel) + { + result = context.ImageSampleExplicitLod(resultType, image, pCoords, operandsMask, operands); + } + else + { + result = context.ImageSampleImplicitLod(resultType, image, pCoords, operandsMask, operands); + } + + if (colorIsVector) + { + result = context.CompositeExtract(context.TypeFP32(), result, (SpvLiteralInteger)texOp.Index); + } + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateTextureSize(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return new OperationResult(AggregateType.S32, context.Constant(context.TypeS32(), 0)); + } + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + SpvInstruction index = null; + + if (isIndexed) + { + index = context.GetS32(texOp.GetSource(0)); + } + + var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format); + + (var imageType, var sampledImageType, var sampledImageVariable) = context.Samplers[meta]; + + var image = context.Load(sampledImageType, sampledImageVariable); + image = context.Image(imageType, image); + + if (texOp.Index == 3) + { + return new OperationResult(AggregateType.S32, context.ImageQueryLevels(context.TypeS32(), image)); + } + else + { + var type = context.SamplersTypes[meta]; + bool hasLod = !type.HasFlag(SamplerType.Multisample) && type != SamplerType.TextureBuffer; + + int dimensions = (type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : type.GetDimensions(); + + if (type.HasFlag(SamplerType.Array)) + { + dimensions++; + } + + var resultType = dimensions == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), dimensions); + + SpvInstruction result; + + if (hasLod) + { + int lodSrcIndex = isBindless || isIndexed ? 1 : 0; + var lod = context.GetS32(operation.GetSource(lodSrcIndex)); + result = context.ImageQuerySizeLod(resultType, image, lod); + } + else + { + result = context.ImageQuerySize(resultType, image); + } + + if (dimensions != 1) + { + result = context.CompositeExtract(context.TypeS32(), result, (SpvLiteralInteger)texOp.Index); + } + + if (texOp.Index < 2 || (type & SamplerType.Mask) == SamplerType.Texture3D) + { + result = ScalingHelpers.ApplyUnscaling(context, texOp, result, isBindless, isIndexed); + } + + return new OperationResult(AggregateType.S32, result); + } + } + + private static OperationResult GenerateTruncate(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslTrunc, null); + } + + private static OperationResult GenerateUnpackDouble2x32(CodeGenContext context, AstOperation operation) + { + var value = context.GetFP64(operation.GetSource(0)); + var vector = context.GlslUnpackDouble2x32(context.TypeVector(context.TypeU32(), 2), value); + var result = context.CompositeExtract(context.TypeU32(), vector, operation.Index); + + return new OperationResult(AggregateType.U32, result); + } + + private static OperationResult GenerateUnpackHalf2x16(CodeGenContext context, AstOperation operation) + { + var value = context.GetU32(operation.GetSource(0)); + var vector = context.GlslUnpackHalf2x16(context.TypeVector(context.TypeFP32(), 2), value); + var result = context.CompositeExtract(context.TypeFP32(), vector, operation.Index); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateVoteAll(CodeGenContext context, AstOperation operation) + { + var result = context.SubgroupAllKHR(context.TypeBool(), context.Get(AggregateType.Bool, operation.GetSource(0))); + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateVoteAllEqual(CodeGenContext context, AstOperation operation) + { + var result = context.SubgroupAllEqualKHR(context.TypeBool(), context.Get(AggregateType.Bool, operation.GetSource(0))); + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateVoteAny(CodeGenContext context, AstOperation operation) + { + var result = context.SubgroupAnyKHR(context.TypeBool(), context.Get(AggregateType.Bool, operation.GetSource(0))); + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateCompare( + CodeGenContext context, + AstOperation operation, + Func emitF, + Func emitI) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + SpvInstruction result; + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + result = emitF(context.TypeBool(), context.GetFP64(src1), context.GetFP64(src2)); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + result = emitF(context.TypeBool(), context.GetFP32(src1), context.GetFP32(src2)); + } + else + { + result = emitI(context.TypeBool(), context.GetS32(src1), context.GetS32(src2)); + } + + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateCompareU32( + CodeGenContext context, + AstOperation operation, + Func emitU) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var result = emitU(context.TypeBool(), context.GetU32(src1), context.GetU32(src2)); + + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateAtomicMemoryBinary( + CodeGenContext context, + AstOperation operation, + Func emitU) + { + var value = context.GetU32(operation.GetSource(2)); + + SpvInstruction elemPointer; + Instruction mr = operation.Inst & Instruction.MrMask; + + if (mr == Instruction.MrStorage) + { + elemPointer = GetStorageElemPointer(context, operation); + } + else if (mr == Instruction.MrShared) + { + var offset = context.GetU32(operation.GetSource(0)); + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset); + } + else + { + throw new InvalidOperationException($"Invalid storage class \"{mr}\"."); + } + + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), elemPointer, one, zero, value)); + } + + private static OperationResult GenerateAtomicMemoryCas(CodeGenContext context, AstOperation operation) + { + var value0 = context.GetU32(operation.GetSource(2)); + var value1 = context.GetU32(operation.GetSource(3)); + + SpvInstruction elemPointer; + Instruction mr = operation.Inst & Instruction.MrMask; + + if (mr == Instruction.MrStorage) + { + elemPointer = GetStorageElemPointer(context, operation); + } + else if (mr == Instruction.MrShared) + { + var offset = context.GetU32(operation.GetSource(0)); + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset); + } + else + { + throw new InvalidOperationException($"Invalid storage class \"{mr}\"."); + } + + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + return new OperationResult(AggregateType.U32, context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, value1, value0)); + } + + private static void GenerateStoreSharedSmallInt(CodeGenContext context, AstOperation operation, int bitSize) + { + var offset = context.Get(AggregateType.U32, operation.GetSource(0)); + var value = context.Get(AggregateType.U32, operation.GetSource(1)); + + var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2)); + var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3)); + bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3)); + + var memory = context.SharedMemory; + + var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), memory, wordOffset); + + GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize); + } + + private static void GenerateStoreStorageSmallInt(CodeGenContext context, AstOperation operation, int bitSize) + { + var i0 = context.Get(AggregateType.S32, operation.GetSource(0)); + var offset = context.Get(AggregateType.U32, operation.GetSource(1)); + var value = context.Get(AggregateType.U32, operation.GetSource(2)); + + var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2)); + var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3)); + bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3)); + + var sbVariable = context.StorageBuffersArray; + + var i1 = context.Constant(context.TypeS32(), 0); + + var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, wordOffset); + + GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize); + } + + private static void GenerateStoreSmallInt( + CodeGenContext context, + SpvInstruction elemPointer, + SpvInstruction bitOffset, + SpvInstruction value, + int bitSize) + { + var loopStart = context.Label(); + var loopEnd = context.Label(); + + context.Branch(loopStart); + context.AddLabel(loopStart); + + var oldValue = context.Load(context.TypeU32(), elemPointer); + var newValue = context.BitFieldInsert(context.TypeU32(), oldValue, value, bitOffset, context.Constant(context.TypeU32(), bitSize)); + + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + var result = context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, newValue, oldValue); + var failed = context.INotEqual(context.TypeBool(), result, oldValue); + + context.LoopMerge(loopEnd, loopStart, LoopControlMask.MaskNone); + context.BranchConditional(failed, loopStart, loopEnd); + + context.AddLabel(loopEnd); + } + + private static SpvInstruction GetStorageElemPointer(CodeGenContext context, AstOperation operation) + { + var sbVariable = context.StorageBuffersArray; + var i0 = context.Get(AggregateType.S32, operation.GetSource(0)); + var i1 = context.Constant(context.TypeS32(), 0); + var i2 = context.Get(AggregateType.S32, operation.GetSource(1)); + + return context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, i2); + } + + private static OperationResult GenerateUnary( + CodeGenContext context, + AstOperation operation, + Func emitF, + Func emitI) + { + var source = operation.GetSource(0); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + return new OperationResult(AggregateType.FP64, emitF(context.TypeFP64(), context.GetFP64(source))); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + return new OperationResult(AggregateType.FP32, emitF(context.TypeFP32(), context.GetFP32(source))); + } + else + { + return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(source))); + } + } + + private static OperationResult GenerateUnaryBool( + CodeGenContext context, + AstOperation operation, + Func emitB) + { + var source = operation.GetSource(0); + return new OperationResult(AggregateType.Bool, emitB(context.TypeBool(), context.Get(AggregateType.Bool, source))); + } + + private static OperationResult GenerateUnaryFP32( + CodeGenContext context, + AstOperation operation, + Func emit) + { + var source = operation.GetSource(0); + return new OperationResult(AggregateType.FP32, emit(context.TypeFP32(), context.GetFP32(source))); + } + + private static OperationResult GenerateUnaryS32( + CodeGenContext context, + AstOperation operation, + Func emitS) + { + var source = operation.GetSource(0); + return new OperationResult(AggregateType.S32, emitS(context.TypeS32(), context.GetS32(source))); + } + + private static OperationResult GenerateBinary( + CodeGenContext context, + AstOperation operation, + Func emitF, + Func emitI) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + var result = emitF(context.TypeFP64(), context.GetFP64(src1), context.GetFP64(src2)); + context.Decorate(result, Decoration.NoContraction); + return new OperationResult(AggregateType.FP64, result); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + var result = emitF(context.TypeFP32(), context.GetFP32(src1), context.GetFP32(src2)); + context.Decorate(result, Decoration.NoContraction); + return new OperationResult(AggregateType.FP32, result); + } + else + { + return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(src1), context.GetS32(src2))); + } + } + + private static OperationResult GenerateBinaryBool( + CodeGenContext context, + AstOperation operation, + Func emitB) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + return new OperationResult(AggregateType.Bool, emitB(context.TypeBool(), context.Get(AggregateType.Bool, src1), context.Get(AggregateType.Bool, src2))); + } + + private static OperationResult GenerateBinaryS32( + CodeGenContext context, + AstOperation operation, + Func emitS) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + return new OperationResult(AggregateType.S32, emitS(context.TypeS32(), context.GetS32(src1), context.GetS32(src2))); + } + + private static OperationResult GenerateBinaryU32( + CodeGenContext context, + AstOperation operation, + Func emitU) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), context.GetU32(src1), context.GetU32(src2))); + } + + private static OperationResult GenerateTernary( + CodeGenContext context, + AstOperation operation, + Func emitF, + Func emitI) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + var result = emitF(context.TypeFP64(), context.GetFP64(src1), context.GetFP64(src2), context.GetFP64(src3)); + context.Decorate(result, Decoration.NoContraction); + return new OperationResult(AggregateType.FP64, result); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + var result = emitF(context.TypeFP32(), context.GetFP32(src1), context.GetFP32(src2), context.GetFP32(src3)); + context.Decorate(result, Decoration.NoContraction); + return new OperationResult(AggregateType.FP32, result); + } + else + { + return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(src1), context.GetS32(src2), context.GetS32(src3))); + } + } + + private static OperationResult GenerateTernaryS32( + CodeGenContext context, + AstOperation operation, + Func emitS) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + return new OperationResult(AggregateType.S32, emitS( + context.TypeS32(), + context.GetS32(src1), + context.GetS32(src2), + context.GetS32(src3))); + } + + private static OperationResult GenerateTernaryU32( + CodeGenContext context, + AstOperation operation, + Func emitU) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + return new OperationResult(AggregateType.U32, emitU( + context.TypeU32(), + context.GetU32(src1), + context.GetU32(src2), + context.GetU32(src3))); + } + + private static OperationResult GenerateQuaternaryS32( + CodeGenContext context, + AstOperation operation, + Func emitS) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + var src4 = operation.GetSource(3); + + return new OperationResult(AggregateType.S32, emitS( + context.TypeS32(), + context.GetS32(src1), + context.GetS32(src2), + context.GetS32(src3), + context.GetS32(src4))); + } + } +} diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs new file mode 100644 index 000000000..f432f1c41 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs @@ -0,0 +1,19 @@ +using Ryujinx.Graphics.Shader.Translation; +using Spv.Generator; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + struct OperationResult + { + public static OperationResult Invalid => new OperationResult(AggregateType.Invalid, null); + + public AggregateType Type { get; } + public Instruction Value { get; } + + public OperationResult(AggregateType type, Instruction value) + { + Type = type; + Value = value; + } + } +} diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs new file mode 100644 index 000000000..8503771c3 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs @@ -0,0 +1,227 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + + static class ScalingHelpers + { + public static SpvInstruction ApplyScaling( + CodeGenContext context, + AstTextureOperation texOp, + SpvInstruction vector, + bool intCoords, + bool isBindless, + bool isIndexed, + bool isArray, + int pCount) + { + if (intCoords) + { + if (context.Config.Stage.SupportsRenderScale() && + !isBindless && + !isIndexed) + { + int index = texOp.Inst == Instruction.ImageLoad + ? context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp) + : context.Config.FindTextureDescriptorIndex(texOp); + + if (pCount == 3 && isArray) + { + return ApplyScaling2DArray(context, vector, index); + } + else if (pCount == 2 && !isArray) + { + return ApplyScaling2D(context, vector, index); + } + } + } + + return vector; + } + + private static SpvInstruction ApplyScaling2DArray(CodeGenContext context, SpvInstruction vector, int index) + { + // The array index is not scaled, just x and y. + var vectorXY = context.VectorShuffle(context.TypeVector(context.TypeS32(), 2), vector, vector, 0, 1); + var vectorZ = context.CompositeExtract(context.TypeS32(), vector, 2); + var vectorXYScaled = ApplyScaling2D(context, vectorXY, index); + var vectorScaled = context.CompositeConstruct(context.TypeVector(context.TypeS32(), 3), vectorXYScaled, vectorZ); + + return vectorScaled; + } + + private static SpvInstruction ApplyScaling2D(CodeGenContext context, SpvInstruction vector, int index) + { + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var fieldIndex = context.Constant(context.TypeU32(), 4); + var scaleIndex = context.Constant(context.TypeU32(), index); + + if (context.Config.Stage == ShaderStage.Vertex) + { + var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32()); + var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3)); + var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer); + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount); + } + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1)); + + var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex); + var scale = context.Load(context.TypeFP32(), scaleElemPointer); + + var ivector2Type = context.TypeVector(context.TypeS32(), 2); + var localVector = context.CoordTemp; + + var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f)); + + var mergeLabel = context.Label(); + + if (context.Config.Stage == ShaderStage.Fragment) + { + var scaledInterpolatedLabel = context.Label(); + var scaledNoInterpolationLabel = context.Label(); + + var needsInterpolation = context.FOrdLessThan(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 0f)); + + context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone); + context.BranchConditional(needsInterpolation, scaledInterpolatedLabel, scaledNoInterpolationLabel); + + // scale < 0.0 + context.AddLabel(scaledInterpolatedLabel); + + ApplyScalingInterpolated(context, localVector, vector, scale); + context.Branch(mergeLabel); + + // scale >= 0.0 + context.AddLabel(scaledNoInterpolationLabel); + + ApplyScalingNoInterpolation(context, localVector, vector, scale); + context.Branch(mergeLabel); + + context.AddLabel(mergeLabel); + + var passthroughLabel = context.Label(); + var finalMergeLabel = context.Label(); + + context.SelectionMerge(finalMergeLabel, SelectionControlMask.MaskNone); + context.BranchConditional(passthrough, passthroughLabel, finalMergeLabel); + + context.AddLabel(passthroughLabel); + + context.Store(localVector, vector); + context.Branch(finalMergeLabel); + + context.AddLabel(finalMergeLabel); + + return context.Load(ivector2Type, localVector); + } + else + { + var passthroughLabel = context.Label(); + var scaledLabel = context.Label(); + + context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone); + context.BranchConditional(passthrough, passthroughLabel, scaledLabel); + + // scale == 1.0 + context.AddLabel(passthroughLabel); + + context.Store(localVector, vector); + context.Branch(mergeLabel); + + // scale != 1.0 + context.AddLabel(scaledLabel); + + ApplyScalingNoInterpolation(context, localVector, vector, scale); + context.Branch(mergeLabel); + + context.AddLabel(mergeLabel); + + return context.Load(ivector2Type, localVector); + } + } + + private static void ApplyScalingInterpolated(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale) + { + var vector2Type = context.TypeVector(context.TypeFP32(), 2); + + var scaleNegated = context.FNegate(context.TypeFP32(), scale); + var scaleVector = context.CompositeConstruct(vector2Type, scaleNegated, scaleNegated); + + var vectorFloat = context.ConvertSToF(vector2Type, vector); + var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scaleNegated); + + var fragCoordPointer = context.Inputs[AttributeConsts.PositionX]; + var fragCoord = context.Load(context.TypeVector(context.TypeFP32(), 4), fragCoordPointer); + var fragCoordXY = context.VectorShuffle(vector2Type, fragCoord, fragCoord, 0, 1); + + var scaleMod = context.FMod(vector2Type, fragCoordXY, scaleVector); + var vectorInterpolated = context.FAdd(vector2Type, vectorScaled, scaleMod); + + context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorInterpolated)); + } + + private static void ApplyScalingNoInterpolation(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale) + { + if (context.Config.Stage == ShaderStage.Vertex) + { + scale = context.GlslFAbs(context.TypeFP32(), scale); + } + + var vector2Type = context.TypeVector(context.TypeFP32(), 2); + + var vectorFloat = context.ConvertSToF(vector2Type, vector); + var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scale); + + context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorScaled)); + } + + public static SpvInstruction ApplyUnscaling( + CodeGenContext context, + AstTextureOperation texOp, + SpvInstruction size, + bool isBindless, + bool isIndexed) + { + if (context.Config.Stage.SupportsRenderScale() && + !isBindless && + !isIndexed) + { + int index = context.Config.FindTextureDescriptorIndex(texOp); + + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var fieldIndex = context.Constant(context.TypeU32(), 4); + var scaleIndex = context.Constant(context.TypeU32(), index); + + if (context.Config.Stage == ShaderStage.Vertex) + { + var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32()); + var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3)); + var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer); + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount); + } + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1)); + + var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex); + var scale = context.GlslFAbs(context.TypeFP32(), context.Load(context.TypeFP32(), scaleElemPointer)); + + var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f)); + + var sizeFloat = context.ConvertSToF(context.TypeFP32(), size); + var sizeUnscaled = context.FDiv(context.TypeFP32(), sizeFloat, scale); + var sizeUnscaledInt = context.ConvertFToS(context.TypeS32(), sizeUnscaled); + + return context.Select(context.TypeS32(), passthrough, size, sizeUnscaledInt); + } + + return size; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs new file mode 100644 index 000000000..fa0341ee9 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs @@ -0,0 +1,226 @@ +using FuncUnaryInstruction = System.Func; +using FuncBinaryInstruction = System.Func; +using FuncTernaryInstruction = System.Func; +using FuncQuaternaryInstruction = System.Func; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + /// + /// Delegate cache for SPIR-V instruction generators. Avoids delegate allocation when passing generators as arguments. + /// + internal struct SpirvDelegates + { + // Unary + public readonly FuncUnaryInstruction GlslFAbs; + public readonly FuncUnaryInstruction GlslSAbs; + public readonly FuncUnaryInstruction GlslCeil; + public readonly FuncUnaryInstruction GlslCos; + public readonly FuncUnaryInstruction GlslExp2; + public readonly FuncUnaryInstruction GlslFloor; + public readonly FuncUnaryInstruction GlslLog2; + public readonly FuncUnaryInstruction FNegate; + public readonly FuncUnaryInstruction SNegate; + public readonly FuncUnaryInstruction GlslInverseSqrt; + public readonly FuncUnaryInstruction GlslRoundEven; + public readonly FuncUnaryInstruction GlslSin; + public readonly FuncUnaryInstruction GlslSqrt; + public readonly FuncUnaryInstruction GlslTrunc; + + // UnaryBool + public readonly FuncUnaryInstruction LogicalNot; + + // UnaryFP32 + public readonly FuncUnaryInstruction DPdx; + public readonly FuncUnaryInstruction DPdy; + + // UnaryS32 + public readonly FuncUnaryInstruction BitCount; + public readonly FuncUnaryInstruction BitReverse; + public readonly FuncUnaryInstruction Not; + + // Compare + public readonly FuncBinaryInstruction FOrdEqual; + public readonly FuncBinaryInstruction IEqual; + public readonly FuncBinaryInstruction FOrdGreaterThan; + public readonly FuncBinaryInstruction SGreaterThan; + public readonly FuncBinaryInstruction FOrdGreaterThanEqual; + public readonly FuncBinaryInstruction SGreaterThanEqual; + public readonly FuncBinaryInstruction FOrdLessThan; + public readonly FuncBinaryInstruction SLessThan; + public readonly FuncBinaryInstruction FOrdLessThanEqual; + public readonly FuncBinaryInstruction SLessThanEqual; + public readonly FuncBinaryInstruction FOrdNotEqual; + public readonly FuncBinaryInstruction INotEqual; + + // CompareU32 + public readonly FuncBinaryInstruction UGreaterThanEqual; + public readonly FuncBinaryInstruction UGreaterThan; + public readonly FuncBinaryInstruction ULessThanEqual; + public readonly FuncBinaryInstruction ULessThan; + + // Binary + public readonly FuncBinaryInstruction FAdd; + public readonly FuncBinaryInstruction IAdd; + public readonly FuncBinaryInstruction FDiv; + public readonly FuncBinaryInstruction SDiv; + public readonly FuncBinaryInstruction GlslFMax; + public readonly FuncBinaryInstruction GlslSMax; + public readonly FuncBinaryInstruction GlslFMin; + public readonly FuncBinaryInstruction GlslSMin; + public readonly FuncBinaryInstruction FMul; + public readonly FuncBinaryInstruction IMul; + public readonly FuncBinaryInstruction FSub; + public readonly FuncBinaryInstruction ISub; + + // BinaryBool + public readonly FuncBinaryInstruction LogicalAnd; + public readonly FuncBinaryInstruction LogicalNotEqual; + public readonly FuncBinaryInstruction LogicalOr; + + // BinaryS32 + public readonly FuncBinaryInstruction BitwiseAnd; + public readonly FuncBinaryInstruction BitwiseXor; + public readonly FuncBinaryInstruction BitwiseOr; + public readonly FuncBinaryInstruction ShiftLeftLogical; + public readonly FuncBinaryInstruction ShiftRightArithmetic; + public readonly FuncBinaryInstruction ShiftRightLogical; + + // BinaryU32 + public readonly FuncBinaryInstruction GlslUMax; + public readonly FuncBinaryInstruction GlslUMin; + + // AtomicMemoryBinary + public readonly FuncQuaternaryInstruction AtomicIAdd; + public readonly FuncQuaternaryInstruction AtomicAnd; + public readonly FuncQuaternaryInstruction AtomicSMin; + public readonly FuncQuaternaryInstruction AtomicUMin; + public readonly FuncQuaternaryInstruction AtomicSMax; + public readonly FuncQuaternaryInstruction AtomicUMax; + public readonly FuncQuaternaryInstruction AtomicOr; + public readonly FuncQuaternaryInstruction AtomicExchange; + public readonly FuncQuaternaryInstruction AtomicXor; + + // Ternary + public readonly FuncTernaryInstruction GlslFClamp; + public readonly FuncTernaryInstruction GlslSClamp; + public readonly FuncTernaryInstruction GlslFma; + + // TernaryS32 + public readonly FuncTernaryInstruction BitFieldSExtract; + public readonly FuncTernaryInstruction BitFieldUExtract; + + // TernaryU32 + public readonly FuncTernaryInstruction GlslUClamp; + + // QuaternaryS32 + public readonly FuncQuaternaryInstruction BitFieldInsert; + + public SpirvDelegates(CodeGenContext context) + { + // Unary + GlslFAbs = context.GlslFAbs; + GlslSAbs = context.GlslSAbs; + GlslCeil = context.GlslCeil; + GlslCos = context.GlslCos; + GlslExp2 = context.GlslExp2; + GlslFloor = context.GlslFloor; + GlslLog2 = context.GlslLog2; + FNegate = context.FNegate; + SNegate = context.SNegate; + GlslInverseSqrt = context.GlslInverseSqrt; + GlslRoundEven = context.GlslRoundEven; + GlslSin = context.GlslSin; + GlslSqrt = context.GlslSqrt; + GlslTrunc = context.GlslTrunc; + + // UnaryBool + LogicalNot = context.LogicalNot; + + // UnaryFP32 + DPdx = context.DPdx; + DPdy = context.DPdy; + + // UnaryS32 + BitCount = context.BitCount; + BitReverse = context.BitReverse; + Not = context.Not; + + // Compare + FOrdEqual = context.FOrdEqual; + IEqual = context.IEqual; + FOrdGreaterThan = context.FOrdGreaterThan; + SGreaterThan = context.SGreaterThan; + FOrdGreaterThanEqual = context.FOrdGreaterThanEqual; + SGreaterThanEqual = context.SGreaterThanEqual; + FOrdLessThan = context.FOrdLessThan; + SLessThan = context.SLessThan; + FOrdLessThanEqual = context.FOrdLessThanEqual; + SLessThanEqual = context.SLessThanEqual; + FOrdNotEqual = context.FOrdNotEqual; + INotEqual = context.INotEqual; + + // CompareU32 + UGreaterThanEqual = context.UGreaterThanEqual; + UGreaterThan = context.UGreaterThan; + ULessThanEqual = context.ULessThanEqual; + ULessThan = context.ULessThan; + + // Binary + FAdd = context.FAdd; + IAdd = context.IAdd; + FDiv = context.FDiv; + SDiv = context.SDiv; + GlslFMax = context.GlslFMax; + GlslSMax = context.GlslSMax; + GlslFMin = context.GlslFMin; + GlslSMin = context.GlslSMin; + FMul = context.FMul; + IMul = context.IMul; + FSub = context.FSub; + ISub = context.ISub; + + // BinaryBool + LogicalAnd = context.LogicalAnd; + LogicalNotEqual = context.LogicalNotEqual; + LogicalOr = context.LogicalOr; + + // BinaryS32 + BitwiseAnd = context.BitwiseAnd; + BitwiseXor = context.BitwiseXor; + BitwiseOr = context.BitwiseOr; + ShiftLeftLogical = context.ShiftLeftLogical; + ShiftRightArithmetic = context.ShiftRightArithmetic; + ShiftRightLogical = context.ShiftRightLogical; + + // BinaryU32 + GlslUMax = context.GlslUMax; + GlslUMin = context.GlslUMin; + + // AtomicMemoryBinary + AtomicIAdd = context.AtomicIAdd; + AtomicAnd = context.AtomicAnd; + AtomicSMin = context.AtomicSMin; + AtomicUMin = context.AtomicUMin; + AtomicSMax = context.AtomicSMax; + AtomicUMax = context.AtomicUMax; + AtomicOr = context.AtomicOr; + AtomicExchange = context.AtomicExchange; + AtomicXor = context.AtomicXor; + + // Ternary + GlslFClamp = context.GlslFClamp; + GlslSClamp = context.GlslSClamp; + GlslFma = context.GlslFma; + + // TernaryS32 + BitFieldSExtract = context.BitFieldSExtract; + BitFieldUExtract = context.BitFieldUExtract; + + // TernaryU32 + GlslUClamp = context.GlslUClamp; + + // QuaternaryS32 + BitFieldInsert = context.BitFieldInsert; + } + } +} diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs new file mode 100644 index 000000000..0235fde56 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -0,0 +1,413 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + using SpvLiteralInteger = Spv.Generator.LiteralInteger; + + using SpvInstructionPool = Spv.Generator.GeneratorPool; + using SpvLiteralIntegerPool = Spv.Generator.GeneratorPool; + + static class SpirvGenerator + { + // Resource pools for Spirv generation. Note: Increase count when more threads are being used. + private const int GeneratorPoolCount = 1; + private static ObjectPool InstructionPool; + private static ObjectPool IntegerPool; + private static object PoolLock; + + static SpirvGenerator() + { + InstructionPool = new (() => new SpvInstructionPool(), GeneratorPoolCount); + IntegerPool = new (() => new SpvLiteralIntegerPool(), GeneratorPoolCount); + PoolLock = new object(); + } + + private const HelperFunctionsMask NeedsInvocationIdMask = + HelperFunctionsMask.Shuffle | + HelperFunctionsMask.ShuffleDown | + HelperFunctionsMask.ShuffleUp | + HelperFunctionsMask.ShuffleXor | + HelperFunctionsMask.SwizzleAdd; + + public static byte[] Generate(StructuredProgramInfo info, ShaderConfig config) + { + SpvInstructionPool instPool; + SpvLiteralIntegerPool integerPool; + + lock (PoolLock) + { + instPool = InstructionPool.Allocate(); + integerPool = IntegerPool.Allocate(); + } + + CodeGenContext context = new CodeGenContext(info, config, instPool, integerPool); + + context.AddCapability(Capability.GroupNonUniformBallot); + context.AddCapability(Capability.ImageBuffer); + context.AddCapability(Capability.ImageQuery); + context.AddCapability(Capability.SampledBuffer); + context.AddCapability(Capability.SubgroupBallotKHR); + context.AddCapability(Capability.SubgroupVoteKHR); + + if (config.TransformFeedbackEnabled && config.Stage != ShaderStage.Fragment) + { + context.AddCapability(Capability.TransformFeedback); + } + + if (config.Stage == ShaderStage.Fragment && context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.AddCapability(Capability.FragmentShaderPixelInterlockEXT); + context.AddExtension("SPV_EXT_fragment_shader_interlock"); + } + else if (config.Stage == ShaderStage.Geometry) + { + context.AddCapability(Capability.Geometry); + + if (config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.AddExtension("SPV_NV_geometry_shader_passthrough"); + context.AddCapability(Capability.GeometryShaderPassthroughNV); + } + } + else if (config.Stage == ShaderStage.TessellationControl || config.Stage == ShaderStage.TessellationEvaluation) + { + context.AddCapability(Capability.Tessellation); + } + + context.AddExtension("SPV_KHR_shader_ballot"); + context.AddExtension("SPV_KHR_subgroup_vote"); + + Declarations.DeclareAll(context, info); + + if ((info.HelperFunctionsMask & NeedsInvocationIdMask) != 0) + { + Declarations.DeclareInvocationId(context); + } + + for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++) + { + var function = info.Functions[funcIndex]; + var retType = context.GetType(function.ReturnType.Convert()); + + var funcArgs = new SpvInstruction[function.InArguments.Length + function.OutArguments.Length]; + + for (int argIndex = 0; argIndex < funcArgs.Length; argIndex++) + { + var argType = context.GetType(function.GetArgumentType(argIndex).Convert()); + var argPointerType = context.TypePointer(StorageClass.Function, argType); + funcArgs[argIndex] = argPointerType; + } + + var funcType = context.TypeFunction(retType, false, funcArgs); + var spvFunc = context.Function(retType, FunctionControlMask.MaskNone, funcType); + + context.DeclareFunction(funcIndex, function, spvFunc); + } + + for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++) + { + Generate(context, info, funcIndex); + } + + byte[] result = context.Generate(); + + lock (PoolLock) + { + InstructionPool.Release(instPool); + IntegerPool.Release(integerPool); + } + + return result; + } + + private static void Generate(CodeGenContext context, StructuredProgramInfo info, int funcIndex) + { + var function = info.Functions[funcIndex]; + + (_, var spvFunc) = context.GetFunction(funcIndex); + + context.AddFunction(spvFunc); + context.StartFunction(); + + Declarations.DeclareParameters(context, function); + + context.EnterBlock(function.MainBlock); + + Declarations.DeclareLocals(context, function); + Declarations.DeclareLocalForArgs(context, info.Functions); + + Generate(context, function.MainBlock); + + // Functions must always end with a return. + if (!(function.MainBlock.Last is AstOperation operation) || + (operation.Inst != Instruction.Return && operation.Inst != Instruction.Discard)) + { + context.Return(); + } + + context.FunctionEnd(); + + if (funcIndex == 0) + { + context.AddEntryPoint(context.Config.Stage.Convert(), spvFunc, "main", context.GetMainInterface()); + + if (context.Config.Stage == ShaderStage.TessellationControl) + { + context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive); + } + else if (context.Config.Stage == ShaderStage.TessellationEvaluation) + { + switch (context.Config.GpuAccessor.QueryTessPatchType()) + { + case TessPatchType.Isolines: + context.AddExecutionMode(spvFunc, ExecutionMode.Isolines); + break; + case TessPatchType.Triangles: + context.AddExecutionMode(spvFunc, ExecutionMode.Triangles); + break; + case TessPatchType.Quads: + context.AddExecutionMode(spvFunc, ExecutionMode.Quads); + break; + } + + switch (context.Config.GpuAccessor.QueryTessSpacing()) + { + case TessSpacing.EqualSpacing: + context.AddExecutionMode(spvFunc, ExecutionMode.SpacingEqual); + break; + case TessSpacing.FractionalEventSpacing: + context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalEven); + break; + case TessSpacing.FractionalOddSpacing: + context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalOdd); + break; + } + + if (context.Config.GpuAccessor.QueryTessCw()) + { + context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCw); + } + else + { + context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCcw); + } + } + else if (context.Config.Stage == ShaderStage.Geometry) + { + switch (context.Config.GpuAccessor.QueryPrimitiveTopology()) + { + case InputTopology.Points: + context.AddExecutionMode(spvFunc, ExecutionMode.InputPoints); + break; + case InputTopology.Lines: + context.AddExecutionMode(spvFunc, ExecutionMode.InputLines); + break; + case InputTopology.LinesAdjacency: + context.AddExecutionMode(spvFunc, ExecutionMode.InputLinesAdjacency); + break; + case InputTopology.Triangles: + context.AddExecutionMode(spvFunc, ExecutionMode.Triangles); + break; + case InputTopology.TrianglesAdjacency: + context.AddExecutionMode(spvFunc, ExecutionMode.InputTrianglesAdjacency); + break; + } + + context.AddExecutionMode(spvFunc, ExecutionMode.Invocations, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive); + + context.AddExecutionMode(spvFunc, context.Config.OutputTopology switch + { + OutputTopology.PointList => ExecutionMode.OutputPoints, + OutputTopology.LineStrip => ExecutionMode.OutputLineStrip, + OutputTopology.TriangleStrip => ExecutionMode.OutputTriangleStrip, + _ => throw new InvalidOperationException($"Invalid output topology \"{context.Config.OutputTopology}\".") + }); + + int maxOutputVertices = context.Config.GpPassthrough ? context.InputVertices : context.Config.MaxOutputVertices; + + context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)maxOutputVertices); + } + else if (context.Config.Stage == ShaderStage.Fragment) + { + context.AddExecutionMode(spvFunc, context.Config.Options.TargetApi == TargetApi.Vulkan + ? ExecutionMode.OriginUpperLeft + : ExecutionMode.OriginLowerLeft); + + if (context.Outputs.ContainsKey(AttributeConsts.FragmentOutputDepth)) + { + context.AddExecutionMode(spvFunc, ExecutionMode.DepthReplacing); + } + + if (context.Config.GpuAccessor.QueryEarlyZForce()) + { + context.AddExecutionMode(spvFunc, ExecutionMode.EarlyFragmentTests); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.FSI) != 0 && + context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.AddExecutionMode(spvFunc, ExecutionMode.PixelInterlockOrderedEXT); + } + } + else if (context.Config.Stage == ShaderStage.Compute) + { + var localSizeX = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeX(); + var localSizeY = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeY(); + var localSizeZ = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeZ(); + + context.AddExecutionMode( + spvFunc, + ExecutionMode.LocalSize, + localSizeX, + localSizeY, + localSizeZ); + } + + if (context.Config.TransformFeedbackEnabled && context.Config.Stage != ShaderStage.Fragment) + { + context.AddExecutionMode(spvFunc, ExecutionMode.Xfb); + } + } + } + + private static void Generate(CodeGenContext context, AstBlock block) + { + AstBlockVisitor visitor = new AstBlockVisitor(block); + + var loopTargets = new Dictionary(); + + context.LoopTargets = loopTargets; + + visitor.BlockEntered += (sender, e) => + { + AstBlock mergeBlock = e.Block.Parent; + + if (e.Block.Type == AstBlockType.If) + { + AstBlock ifTrueBlock = e.Block; + AstBlock ifFalseBlock; + + if (AstHelper.Next(e.Block) is AstBlock nextBlock && nextBlock.Type == AstBlockType.Else) + { + ifFalseBlock = nextBlock; + } + else + { + ifFalseBlock = mergeBlock; + } + + var condition = context.Get(AggregateType.Bool, e.Block.Condition); + + context.SelectionMerge(context.GetNextLabel(mergeBlock), SelectionControlMask.MaskNone); + context.BranchConditional(condition, context.GetNextLabel(ifTrueBlock), context.GetNextLabel(ifFalseBlock)); + } + else if (e.Block.Type == AstBlockType.DoWhile) + { + var continueTarget = context.Label(); + + loopTargets.Add(e.Block, (context.NewBlock(), continueTarget)); + + context.LoopMerge(context.GetNextLabel(mergeBlock), continueTarget, LoopControlMask.MaskNone); + context.Branch(context.GetFirstLabel(e.Block)); + } + + context.EnterBlock(e.Block); + }; + + visitor.BlockLeft += (sender, e) => + { + if (e.Block.Parent != null) + { + if (e.Block.Type == AstBlockType.DoWhile) + { + // This is a loop, we need to jump back to the loop header + // if the condition is true. + AstBlock mergeBlock = e.Block.Parent; + + (var loopTarget, var continueTarget) = loopTargets[e.Block]; + + context.Branch(continueTarget); + context.AddLabel(continueTarget); + + var condition = context.Get(AggregateType.Bool, e.Block.Condition); + + context.BranchConditional(condition, loopTarget, context.GetNextLabel(mergeBlock)); + } + else + { + // We only need a branch if the last instruction didn't + // already cause the program to exit or jump elsewhere. + bool lastIsCf = e.Block.Last is AstOperation lastOp && + (lastOp.Inst == Instruction.Discard || + lastOp.Inst == Instruction.LoopBreak || + lastOp.Inst == Instruction.LoopContinue || + lastOp.Inst == Instruction.Return); + + if (!lastIsCf) + { + context.Branch(context.GetNextLabel(e.Block.Parent)); + } + } + + bool hasElse = AstHelper.Next(e.Block) is AstBlock nextBlock && + (nextBlock.Type == AstBlockType.Else || + nextBlock.Type == AstBlockType.ElseIf); + + // Re-enter the parent block. + if (e.Block.Parent != null && !hasElse) + { + context.EnterBlock(e.Block.Parent); + } + } + }; + + foreach (IAstNode node in visitor.Visit()) + { + if (node is AstAssignment assignment) + { + var dest = (AstOperand)assignment.Destination; + + if (dest.Type == OperandType.LocalVariable) + { + var source = context.Get(dest.VarType.Convert(), assignment.Source); + context.Store(context.GetLocalPointer(dest), source); + } + else if (dest.Type == OperandType.Attribute || dest.Type == OperandType.AttributePerPatch) + { + if (AttributeInfo.Validate(context.Config, dest.Value, isOutAttr: true)) + { + bool perPatch = dest.Type == OperandType.AttributePerPatch; + AggregateType elemType; + + var elemPointer = perPatch + ? context.GetAttributePerPatchElemPointer(dest.Value, true, out elemType) + : context.GetAttributeElemPointer(dest.Value, true, null, out elemType); + + context.Store(elemPointer, context.Get(elemType, assignment.Source)); + } + } + else if (dest.Type == OperandType.Argument) + { + var source = context.Get(dest.VarType.Convert(), assignment.Source); + context.Store(context.GetArgumentPointer(dest), source); + } + else + { + throw new NotImplementedException(dest.Type.ToString()); + } + } + else if (node is AstOperation operation) + { + Instructions.Generate(context, operation); + } + } + } + } +} diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs new file mode 100644 index 000000000..686259ad7 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs @@ -0,0 +1,33 @@ +using System; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + struct TextureMeta : IEquatable + { + public int CbufSlot { get; } + public int Handle { get; } + public TextureFormat Format { get; } + + public TextureMeta(int cbufSlot, int handle, TextureFormat format) + { + CbufSlot = cbufSlot; + Handle = handle; + Format = format; + } + + public override bool Equals(object obj) + { + return obj is TextureMeta other && Equals(other); + } + + public bool Equals(TextureMeta other) + { + return CbufSlot == other.CbufSlot && Handle == other.Handle && Format == other.Format; + } + + public override int GetHashCode() + { + return HashCode.Combine(CbufSlot, Handle, Format); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/Ryujinx.Graphics.Shader/IGpuAccessor.cs index 180fc1874..178c1042d 100644 --- a/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -34,6 +34,21 @@ namespace Ryujinx.Graphics.Shader /// Span of the memory location ReadOnlySpan GetCode(ulong address, int minimumSize); + AlphaTestOp QueryAlphaTestCompare() + { + return AlphaTestOp.Always; + } + + float QueryAlphaTestReference() + { + return 0f; + } + + AttributeType QueryAttributeType(int location) + { + return AttributeType.Float; + } + /// /// Queries the binding number of a constant buffer. /// @@ -58,8 +73,9 @@ namespace Ryujinx.Graphics.Shader /// Queries the binding number of a texture. /// /// Texture index + /// Indicates if the texture is a buffer texture /// Binding number - int QueryBindingTexture(int index) + int QueryBindingTexture(int index, bool isBuffer) { return index; } @@ -68,8 +84,9 @@ namespace Ryujinx.Graphics.Shader /// Queries the binding number of an image. /// /// Image index + /// Indicates if the image is a buffer image /// Binding number - int QueryBindingImage(int index) + int QueryBindingImage(int index, bool isBuffer) { return index; } @@ -182,6 +199,15 @@ namespace Ryujinx.Graphics.Shader return false; } + /// + /// Queries host GPU geometry shader passthrough support. + /// + /// True if the GPU and driver supports geometry shader passthrough, false otherwise + bool QueryHostSupportsGeometryShaderPassthrough() + { + return true; + } + /// /// Queries host support for readable images without a explicit format declaration on the shader. /// @@ -218,6 +244,16 @@ namespace Ryujinx.Graphics.Shader return true; } + float QueryPointSize() + { + return 1f; + } + + bool QueryProgramPointSize() + { + return true; + } + /// /// Queries sampler type information. /// @@ -291,6 +327,11 @@ namespace Ryujinx.Graphics.Shader return TextureFormat.R8G8B8A8Unorm; } + bool QueryTransformDepthMinusOneToOne() + { + return false; + } + /// /// Queries transform feedback enable state. /// diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs index 1f7d2b25e..2aca118b7 100644 --- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs +++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs @@ -58,5 +58,34 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation { return Operations.Last?.Value; } + + public void Append(INode node) + { + INode lastOp = GetLastOp(); + + if (lastOp is Operation operation && IsControlFlowInst(operation.Inst)) + { + Operations.AddBefore(Operations.Last, node); + } + else + { + Operations.AddLast(node); + } + } + + private static bool IsControlFlowInst(Instruction inst) + { + switch (inst) + { + case Instruction.Branch: + case Instruction.BranchIfFalse: + case Instruction.BranchIfTrue: + case Instruction.Discard: + case Instruction.Return: + return true; + } + + return false; + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index 81cc0caf3..eed27c94e 100644 --- a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -10,6 +10,7 @@ + diff --git a/Ryujinx.Graphics.Shader/SamplerType.cs b/Ryujinx.Graphics.Shader/SamplerType.cs index 286ae9d5d..d04b16b38 100644 --- a/Ryujinx.Graphics.Shader/SamplerType.cs +++ b/Ryujinx.Graphics.Shader/SamplerType.cs @@ -25,32 +25,28 @@ namespace Ryujinx.Graphics.Shader { public static int GetDimensions(this SamplerType type) { - switch (type & SamplerType.Mask) + return (type & SamplerType.Mask) switch { - case SamplerType.Texture1D: return 1; - case SamplerType.TextureBuffer: return 1; - case SamplerType.Texture2D: return 2; - case SamplerType.Texture3D: return 3; - case SamplerType.TextureCube: return 3; - } - - throw new ArgumentException($"Invalid sampler type \"{type}\"."); + SamplerType.Texture1D => 1, + SamplerType.TextureBuffer => 1, + SamplerType.Texture2D => 2, + SamplerType.Texture3D => 3, + SamplerType.TextureCube => 3, + _ => throw new ArgumentException($"Invalid sampler type \"{type}\".") + }; } public static string ToGlslSamplerType(this SamplerType type) { - string typeName; - - switch (type & SamplerType.Mask) + string typeName = (type & SamplerType.Mask) switch { - case SamplerType.Texture1D: typeName = "sampler1D"; break; - case SamplerType.TextureBuffer: typeName = "samplerBuffer"; break; - case SamplerType.Texture2D: typeName = "sampler2D"; break; - case SamplerType.Texture3D: typeName = "sampler3D"; break; - case SamplerType.TextureCube: typeName = "samplerCube"; break; - - default: throw new ArgumentException($"Invalid sampler type \"{type}\"."); - } + SamplerType.Texture1D => "sampler1D", + SamplerType.TextureBuffer => "samplerBuffer", + SamplerType.Texture2D => "sampler2D", + SamplerType.Texture3D => "sampler3D", + SamplerType.TextureCube => "samplerCube", + _ => throw new ArgumentException($"Invalid sampler type \"{type}\".") + }; if ((type & SamplerType.Multisample) != 0) { @@ -72,18 +68,15 @@ namespace Ryujinx.Graphics.Shader public static string ToGlslImageType(this SamplerType type, VariableType componentType) { - string typeName; - - switch (type & SamplerType.Mask) + string typeName = (type & SamplerType.Mask) switch { - case SamplerType.Texture1D: typeName = "image1D"; break; - case SamplerType.TextureBuffer: typeName = "imageBuffer"; break; - case SamplerType.Texture2D: typeName = "image2D"; break; - case SamplerType.Texture3D: typeName = "image3D"; break; - case SamplerType.TextureCube: typeName = "imageCube"; break; - - default: throw new ArgumentException($"Invalid sampler type \"{type}\"."); - } + SamplerType.Texture1D => "image1D", + SamplerType.TextureBuffer => "imageBuffer", + SamplerType.Texture2D => "image2D", + SamplerType.Texture3D => "image3D", + SamplerType.TextureCube => "imageCube", + _ => throw new ArgumentException($"Invalid sampler type \"{type}\".") + }; if ((type & SamplerType.Multisample) != 0) { diff --git a/Ryujinx.Graphics.Shader/ShaderProgram.cs b/Ryujinx.Graphics.Shader/ShaderProgram.cs index 29fff21e6..d790c831c 100644 --- a/Ryujinx.Graphics.Shader/ShaderProgram.cs +++ b/Ryujinx.Graphics.Shader/ShaderProgram.cs @@ -29,6 +29,7 @@ namespace Ryujinx.Graphics.Shader public void Prepend(string line) { + System.Console.WriteLine("prepend " + line); Code = line + Environment.NewLine + Code; } } diff --git a/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs b/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs index fdef87de5..2f34bee83 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs @@ -32,6 +32,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr private LinkedList _nodes; public IAstNode First => _nodes.First?.Value; + public IAstNode Last => _nodes.Last?.Value; public int Count => _nodes.Count; diff --git a/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs b/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs index c4d8370c2..b71ae2c41 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs @@ -15,7 +15,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr // When debug mode is enabled, we disable expression propagation // (this makes comparison with the disassembly easier). - if ((context.Config.Options.Flags & TranslationFlags.DebugMode) == 0) + if (!context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode)) { AstBlockVisitor visitor = new AstBlockVisitor(mainBlock); diff --git a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index 3dfd025b5..d45f8d4ee 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -15,6 +15,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr ShuffleXor = 1 << 7, StoreSharedSmallInt = 1 << 8, StoreStorageSmallInt = 1 << 9, - SwizzleAdd = 1 << 10 + SwizzleAdd = 1 << 10, + FSI = 1 << 11 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs b/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs index 53391b626..541ca298e 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs @@ -17,7 +17,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { LinkedListNode nextNode = node.Next; - if (!(node.Value is PhiNode phi)) + if (node.Value is not PhiNode phi) { node = nextNode; @@ -32,7 +32,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Operation copyOp = new Operation(Instruction.Copy, phi.Dest, src); - AddBeforeBranch(srcBlock, copyOp); + srcBlock.Append(copyOp); } block.Operations.Remove(node); @@ -41,34 +41,5 @@ namespace Ryujinx.Graphics.Shader.StructuredIr } } } - - private static void AddBeforeBranch(BasicBlock block, INode node) - { - INode lastOp = block.GetLastOp(); - - if (lastOp is Operation operation && IsControlFlowInst(operation.Inst)) - { - block.Operations.AddBefore(block.Operations.Last, node); - } - else - { - block.Operations.AddLast(node); - } - } - - private static bool IsControlFlowInst(Instruction inst) - { - switch (inst) - { - case Instruction.Branch: - case Instruction.BranchIfFalse: - case Instruction.BranchIfTrue: - case Instruction.Discard: - case Instruction.Return: - return true; - } - - return false; - } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index ce79f3b8e..7e15384b2 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -89,6 +89,18 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { Instruction inst = operation.Inst; + if (inst == Instruction.LoadAttribute) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if (src1.Type == OperandType.Constant && src2.Type == OperandType.Constant) + { + int attrOffset = src1.Value + (src2.Value << 2); + context.Info.Inputs.Add(attrOffset); + } + } + int sourcesCount = operation.SourcesCount; int outDestsCount = operation.DestsCount != 0 ? operation.DestsCount - 1 : 0; @@ -232,6 +244,10 @@ namespace Ryujinx.Graphics.Shader.StructuredIr case Instruction.SwizzleAdd: context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd; break; + case Instruction.FSIBegin: + case Instruction.FSIEnd: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI; + break; } } diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs index 2a39d0210..1eac8a909 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs @@ -2,6 +2,7 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.Translation; using System.Collections.Generic; using System.Linq; +using System.Numerics; using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper; @@ -35,6 +36,40 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Info = new StructuredProgramInfo(); Config = config; + + if (config.GpPassthrough) + { + int passthroughAttributes = config.PassthroughAttributes; + while (passthroughAttributes != 0) + { + int index = BitOperations.TrailingZeroCount(passthroughAttributes); + + int attrBase = AttributeConsts.UserAttributeBase + index * 16; + Info.Inputs.Add(attrBase); + Info.Inputs.Add(attrBase + 4); + Info.Inputs.Add(attrBase + 8); + Info.Inputs.Add(attrBase + 12); + + passthroughAttributes &= ~(1 << index); + } + + Info.Inputs.Add(AttributeConsts.PositionX); + Info.Inputs.Add(AttributeConsts.PositionY); + Info.Inputs.Add(AttributeConsts.PositionZ); + Info.Inputs.Add(AttributeConsts.PositionW); + Info.Inputs.Add(AttributeConsts.PointSize); + + for (int i = 0; i < 8; i++) + { + Info.Inputs.Add(AttributeConsts.ClipDistance0 + i * 4); + } + } + else if (config.Stage == ShaderStage.Fragment) + { + // Potentially used for texture coordinate scaling. + Info.Inputs.Add(AttributeConsts.PositionX); + Info.Inputs.Add(AttributeConsts.PositionY); + } } public void EnterFunction( @@ -277,6 +312,15 @@ namespace Ryujinx.Graphics.Shader.StructuredIr public AstOperand GetOperandDef(Operand operand) { + if (operand.Type == OperandType.Attribute) + { + Info.Outputs.Add(operand.Value & AttributeConsts.Mask); + } + else if (operand.Type == OperandType.AttributePerPatch) + { + Info.OutputsPerPatch.Add(operand.Value & AttributeConsts.Mask); + } + return GetOperand(operand); } @@ -288,6 +332,15 @@ namespace Ryujinx.Graphics.Shader.StructuredIr return GetOperandDef(operand); } + if (operand.Type == OperandType.Attribute) + { + Info.Inputs.Add(operand.Value); + } + else if (operand.Type == OperandType.AttributePerPatch) + { + Info.InputsPerPatch.Add(operand.Value); + } + return GetOperand(operand); } diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs index 2dc239643..43bdfaba5 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs @@ -22,6 +22,11 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { public List Functions { get; } + public HashSet Inputs { get; } + public HashSet Outputs { get; } + public HashSet InputsPerPatch { get; } + public HashSet OutputsPerPatch { get; } + public HelperFunctionsMask HelperFunctionsMask { get; set; } public TransformFeedbackOutput[] TransformFeedbackOutputs { get; } @@ -30,6 +35,11 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { Functions = new List(); + Inputs = new HashSet(); + Outputs = new HashSet(); + InputsPerPatch = new HashSet(); + OutputsPerPatch = new HashSet(); + TransformFeedbackOutputs = new TransformFeedbackOutput[0xc0]; } } diff --git a/Ryujinx.Graphics.Shader/SupportBuffer.cs b/Ryujinx.Graphics.Shader/SupportBuffer.cs index 28a48c2ad..5fe993278 100644 --- a/Ryujinx.Graphics.Shader/SupportBuffer.cs +++ b/Ryujinx.Graphics.Shader/SupportBuffer.cs @@ -24,8 +24,8 @@ namespace Ryujinx.Graphics.Shader public static int ComputeRenderScaleOffset; public const int FragmentIsBgraCount = 8; - // One for the render target, 32 for the textures, and 8 for the images. - public const int RenderScaleMaxCount = 1 + 32 + 8; + // One for the render target, 64 for the textures, and 8 for the images. + public const int RenderScaleMaxCount = 1 + 64 + 8; private static int OffsetOf(ref SupportBuffer storage, ref T target) { @@ -52,7 +52,7 @@ namespace Ryujinx.Graphics.Shader public Vector4 ViewportInverse; public Vector4 FragmentRenderScaleCount; - // Render scale max count: 1 + 32 + 8. First scale is fragment output scale, others are textures/image inputs. - public Array41> RenderScale; + // Render scale max count: 1 + 64 + 8. First scale is fragment output scale, others are textures/image inputs. + public Array73> RenderScale; } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/TextureHandle.cs b/Ryujinx.Graphics.Shader/TextureHandle.cs index b3712e6bf..d468188b8 100644 --- a/Ryujinx.Graphics.Shader/TextureHandle.cs +++ b/Ryujinx.Graphics.Shader/TextureHandle.cs @@ -1,3 +1,4 @@ +using System; using System.Runtime.CompilerServices; namespace Ryujinx.Graphics.Shader @@ -50,5 +51,63 @@ namespace Ryujinx.Graphics.Shader { return (handle & 0x3fff, (handle >> 14) & 0x3fff, (TextureHandleType)((uint)handle >> 28)); } + + /// + /// Unpacks the texture ID from the real texture handle. + /// + /// The real texture handle + /// The texture ID + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int UnpackTextureId(int packedId) + { + return (packedId >> 0) & 0xfffff; + } + + /// + /// Unpacks the sampler ID from the real texture handle. + /// + /// The real texture handle + /// The sampler ID + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int UnpackSamplerId(int packedId) + { + return (packedId >> 20) & 0xfff; + } + + /// + /// Reads a packed texture and sampler ID (basically, the real texture handle) + /// from a given texture/sampler constant buffer. + /// + /// A word offset of the handle on the buffer (the "fake" shader handle) + /// The constant buffer to fetch texture IDs from + /// The constant buffer to fetch sampler IDs from + /// The packed texture and sampler ID (the real texture handle) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int ReadPackedId(int wordOffset, ReadOnlySpan cachedTextureBuffer, ReadOnlySpan cachedSamplerBuffer) + { + (int textureWordOffset, int samplerWordOffset, TextureHandleType handleType) = UnpackOffsets(wordOffset); + + int handle = cachedTextureBuffer[textureWordOffset]; + + // The "wordOffset" (which is really the immediate value used on texture instructions on the shader) + // is a 13-bit value. However, in order to also support separate samplers and textures (which uses + // bindless textures on the shader), we extend it with another value on the higher 16 bits with + // another offset for the sampler. + // The shader translator has code to detect separate texture and sampler uses with a bindless texture, + // turn that into a regular texture access and produce those special handles with values on the higher 16 bits. + if (handleType != TextureHandleType.CombinedSampler) + { + int samplerHandle = cachedSamplerBuffer[samplerWordOffset]; + + if (handleType == TextureHandleType.SeparateSamplerId) + { + samplerHandle <<= 20; + } + + handle |= samplerHandle; + } + + return handle; + } } } diff --git a/Ryujinx.Graphics.Shader/Translation/AggregateType.cs b/Ryujinx.Graphics.Shader/Translation/AggregateType.cs new file mode 100644 index 000000000..dcd1e0bd4 --- /dev/null +++ b/Ryujinx.Graphics.Shader/Translation/AggregateType.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + enum AggregateType + { + Invalid, + Void, + Bool, + FP32, + FP64, + S32, + U32, + + ElementTypeMask = 0xff, + + Vector = 1 << 8, + Array = 1 << 9 + } +} diff --git a/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs b/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs index ada60ab97..0c3ab08e2 100644 --- a/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs +++ b/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs @@ -9,6 +9,7 @@ namespace Ryujinx.Graphics.Shader.Translation public const int TessLevelInner0 = 0x010; public const int TessLevelInner1 = 0x014; public const int Layer = 0x064; + public const int ViewportIndex = 0x068; public const int PointSize = 0x06c; public const int PositionX = 0x070; public const int PositionY = 0x074; diff --git a/Ryujinx.Graphics.Shader/Translation/AttributeInfo.cs b/Ryujinx.Graphics.Shader/Translation/AttributeInfo.cs new file mode 100644 index 000000000..22b9ba9b0 --- /dev/null +++ b/Ryujinx.Graphics.Shader/Translation/AttributeInfo.cs @@ -0,0 +1,138 @@ +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Translation +{ + struct AttributeInfo + { + private static readonly Dictionary BuiltInAttributes = new Dictionary() + { + { AttributeConsts.TessLevelOuter0, new AttributeInfo(AttributeConsts.TessLevelOuter0, 0, 4, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.TessLevelOuter1, new AttributeInfo(AttributeConsts.TessLevelOuter0, 1, 4, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.TessLevelOuter2, new AttributeInfo(AttributeConsts.TessLevelOuter0, 2, 4, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.TessLevelOuter3, new AttributeInfo(AttributeConsts.TessLevelOuter0, 3, 4, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.TessLevelInner0, new AttributeInfo(AttributeConsts.TessLevelInner0, 0, 2, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.TessLevelInner1, new AttributeInfo(AttributeConsts.TessLevelInner0, 1, 2, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.Layer, new AttributeInfo(AttributeConsts.Layer, 0, 1, AggregateType.S32) }, + { AttributeConsts.ViewportIndex, new AttributeInfo(AttributeConsts.ViewportIndex, 0, 1, AggregateType.S32) }, + { AttributeConsts.PointSize, new AttributeInfo(AttributeConsts.PointSize, 0, 1, AggregateType.FP32) }, + { AttributeConsts.PositionX, new AttributeInfo(AttributeConsts.PositionX, 0, 4, AggregateType.Vector | AggregateType.FP32) }, + { AttributeConsts.PositionY, new AttributeInfo(AttributeConsts.PositionX, 1, 4, AggregateType.Vector | AggregateType.FP32) }, + { AttributeConsts.PositionZ, new AttributeInfo(AttributeConsts.PositionX, 2, 4, AggregateType.Vector | AggregateType.FP32) }, + { AttributeConsts.PositionW, new AttributeInfo(AttributeConsts.PositionX, 3, 4, AggregateType.Vector | AggregateType.FP32) }, + { AttributeConsts.ClipDistance0, new AttributeInfo(AttributeConsts.ClipDistance0, 0, 8, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.ClipDistance1, new AttributeInfo(AttributeConsts.ClipDistance0, 1, 8, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.ClipDistance2, new AttributeInfo(AttributeConsts.ClipDistance0, 2, 8, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.ClipDistance3, new AttributeInfo(AttributeConsts.ClipDistance0, 3, 8, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.ClipDistance4, new AttributeInfo(AttributeConsts.ClipDistance0, 4, 8, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.ClipDistance5, new AttributeInfo(AttributeConsts.ClipDistance0, 5, 8, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.ClipDistance6, new AttributeInfo(AttributeConsts.ClipDistance0, 6, 8, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.ClipDistance7, new AttributeInfo(AttributeConsts.ClipDistance0, 7, 8, AggregateType.Array | AggregateType.FP32) }, + { AttributeConsts.PointCoordX, new AttributeInfo(AttributeConsts.PointCoordX, 0, 2, AggregateType.Vector | AggregateType.FP32) }, + { AttributeConsts.PointCoordY, new AttributeInfo(AttributeConsts.PointCoordX, 1, 2, AggregateType.Vector | AggregateType.FP32) }, + { AttributeConsts.TessCoordX, new AttributeInfo(AttributeConsts.TessCoordX, 0, 2, AggregateType.Vector | AggregateType.FP32) }, + { AttributeConsts.TessCoordY, new AttributeInfo(AttributeConsts.TessCoordX, 1, 2, AggregateType.Vector | AggregateType.FP32) }, + { AttributeConsts.InstanceId, new AttributeInfo(AttributeConsts.InstanceId, 0, 1, AggregateType.S32) }, + { AttributeConsts.VertexId, new AttributeInfo(AttributeConsts.VertexId, 0, 1, AggregateType.S32) }, + { AttributeConsts.FrontFacing, new AttributeInfo(AttributeConsts.FrontFacing, 0, 1, AggregateType.Bool) }, + + // Special. + { AttributeConsts.FragmentOutputDepth, new AttributeInfo(AttributeConsts.FragmentOutputDepth, 0, 1, AggregateType.FP32) }, + { AttributeConsts.ThreadKill, new AttributeInfo(AttributeConsts.ThreadKill, 0, 1, AggregateType.Bool) }, + { AttributeConsts.ThreadIdX, new AttributeInfo(AttributeConsts.ThreadIdX, 0, 3, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.ThreadIdY, new AttributeInfo(AttributeConsts.ThreadIdX, 1, 3, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.ThreadIdZ, new AttributeInfo(AttributeConsts.ThreadIdX, 2, 3, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.CtaIdX, new AttributeInfo(AttributeConsts.CtaIdX, 0, 3, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.CtaIdY, new AttributeInfo(AttributeConsts.CtaIdX, 1, 3, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.CtaIdZ, new AttributeInfo(AttributeConsts.CtaIdX, 2, 3, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.LaneId, new AttributeInfo(AttributeConsts.LaneId, 0, 1, AggregateType.U32) }, + { AttributeConsts.InvocationId, new AttributeInfo(AttributeConsts.InvocationId, 0, 1, AggregateType.S32) }, + { AttributeConsts.PrimitiveId, new AttributeInfo(AttributeConsts.PrimitiveId, 0, 1, AggregateType.S32) }, + { AttributeConsts.PatchVerticesIn, new AttributeInfo(AttributeConsts.PatchVerticesIn, 0, 1, AggregateType.S32) }, + { AttributeConsts.EqMask, new AttributeInfo(AttributeConsts.EqMask, 0, 4, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.GeMask, new AttributeInfo(AttributeConsts.GeMask, 0, 4, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.GtMask, new AttributeInfo(AttributeConsts.GtMask, 0, 4, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.LeMask, new AttributeInfo(AttributeConsts.LeMask, 0, 4, AggregateType.Vector | AggregateType.U32) }, + { AttributeConsts.LtMask, new AttributeInfo(AttributeConsts.LtMask, 0, 4, AggregateType.Vector | AggregateType.U32) }, + }; + + public int BaseValue { get; } + public int Value { get; } + public int Length { get; } + public AggregateType Type { get; } + public bool IsBuiltin { get; } + public bool IsValid => Type != AggregateType.Invalid; + + public AttributeInfo(int baseValue, int index, int length, AggregateType type, bool isBuiltin = true) + { + BaseValue = baseValue; + Value = baseValue + index * 4; + Length = length; + Type = type; + IsBuiltin = isBuiltin; + } + + public int GetInnermostIndex() + { + return (Value - BaseValue) / 4; + } + + public static bool Validate(ShaderConfig config, int value, bool isOutAttr) + { + return From(config, value, isOutAttr).IsValid; + } + + public static AttributeInfo From(ShaderConfig config, int value, bool isOutAttr) + { + value &= ~3; + + if (value >= AttributeConsts.UserAttributeBase && value < AttributeConsts.UserAttributeEnd) + { + int location = (value - AttributeConsts.UserAttributeBase) / 16; + + AggregateType elemType; + + if (config.Stage == ShaderStage.Vertex && !isOutAttr) + { + elemType = config.GpuAccessor.QueryAttributeType(location) switch + { + AttributeType.Sint => AggregateType.S32, + AttributeType.Uint => AggregateType.U32, + _ => AggregateType.FP32 + }; + } + else + { + elemType = AggregateType.FP32; + } + + return new AttributeInfo(value & ~0xf, (value >> 2) & 3, 4, AggregateType.Vector | elemType, false); + } + else if (value >= AttributeConsts.FragmentOutputColorBase && value < AttributeConsts.FragmentOutputColorEnd) + { + return new AttributeInfo(value & ~0xf, (value >> 2) & 3, 4, AggregateType.Vector | AggregateType.FP32, false); + } + else if (value == AttributeConsts.SupportBlockViewInverseX || value == AttributeConsts.SupportBlockViewInverseY) + { + return new AttributeInfo(value, 0, 1, AggregateType.FP32); + } + else if (BuiltInAttributes.TryGetValue(value, out AttributeInfo info)) + { + return info; + } + + return new AttributeInfo(value, 0, 0, AggregateType.Invalid); + } + + public static bool IsArrayBuiltIn(int attr) + { + if (attr <= AttributeConsts.TessLevelInner1 || + attr == AttributeConsts.TessCoordX || + attr == AttributeConsts.TessCoordY) + { + return false; + } + + return (attr & AttributeConsts.SpecialMask) == 0; + } + } +} diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs index ba3b551d9..fb46e57c9 100644 --- a/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs +++ b/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs @@ -2,6 +2,7 @@ using Ryujinx.Graphics.Shader.Decoders; using Ryujinx.Graphics.Shader.IntermediateRepresentation; using System.Collections.Generic; using System.Diagnostics; +using System.Numerics; using System.Runtime.CompilerServices; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; @@ -30,6 +31,19 @@ namespace Ryujinx.Graphics.Shader.Translation IsNonMain = isNonMain; _operations = new List(); _labels = new Dictionary(); + + EmitStart(); + } + + private void EmitStart() + { + if (Config.Stage == ShaderStage.Vertex && + Config.Options.TargetApi == TargetApi.Vulkan && + (Config.Options.Flags & TranslationFlags.VertexA) == 0) + { + // Vulkan requires the point size to be always written on the shader if the primitive topology is points. + this.Copy(Attribute(AttributeConsts.PointSize), ConstF(Config.GpuAccessor.QueryPointSize())); + } } public T GetOp() where T : unmanaged @@ -43,7 +57,7 @@ namespace Ryujinx.Graphics.Shader.Translation { Operation operation = new Operation(inst, dest, sources); - Add(operation); + _operations.Add(operation); return dest; } @@ -167,6 +181,15 @@ namespace Ryujinx.Graphics.Shader.Translation this.Copy(Attribute(AttributeConsts.PositionX), this.FPFusedMultiplyAdd(x, xScale, negativeOne)); this.Copy(Attribute(AttributeConsts.PositionY), this.FPFusedMultiplyAdd(y, yScale, negativeOne)); } + + if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne()) + { + Operand z = Attribute(AttributeConsts.PositionZ | AttributeConsts.LoadOutputMask); + Operand w = Attribute(AttributeConsts.PositionW | AttributeConsts.LoadOutputMask); + Operand halfW = this.FPMultiply(w, ConstF(0.5f)); + + this.Copy(Attribute(AttributeConsts.PositionZ), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW)); + } } public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal) @@ -184,8 +207,15 @@ namespace Ryujinx.Graphics.Shader.Translation oldYLocal = null; } - // Will be used by Vulkan backend for depth mode emulation. - oldZLocal = null; + if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne()) + { + oldZLocal = Local(); + this.Copy(oldZLocal, Attribute(AttributeConsts.PositionZ | AttributeConsts.LoadOutputMask)); + } + else + { + oldZLocal = null; + } PrepareForVertexReturn(); } @@ -203,8 +233,48 @@ namespace Ryujinx.Graphics.Shader.Translation { PrepareForVertexReturn(); } + else if (Config.Stage == ShaderStage.Geometry) + { + void WriteOutput(int index, int primIndex) + { + Operand x = this.LoadAttribute(Const(index), Const(0), Const(primIndex)); + Operand y = this.LoadAttribute(Const(index + 4), Const(0), Const(primIndex)); + Operand z = this.LoadAttribute(Const(index + 8), Const(0), Const(primIndex)); + Operand w = this.LoadAttribute(Const(index + 12), Const(0), Const(primIndex)); + + this.Copy(Attribute(index), x); + this.Copy(Attribute(index + 4), y); + this.Copy(Attribute(index + 8), z); + this.Copy(Attribute(index + 12), w); + } + + if (Config.GpPassthrough) + { + int inputVertices = Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices(); + + for (int primIndex = 0; primIndex < inputVertices; primIndex++) + { + WriteOutput(AttributeConsts.PositionX, primIndex); + + int passthroughAttributes = Config.PassthroughAttributes; + while (passthroughAttributes != 0) + { + int index = BitOperations.TrailingZeroCount(passthroughAttributes); + WriteOutput(AttributeConsts.UserAttributeBase + index * 16, primIndex); + Config.SetOutputUserAttribute(index, perPatch: false); + passthroughAttributes &= ~(1 << index); + } + + this.EmitVertex(); + } + + this.EndPrimitive(); + } + } else if (Config.Stage == ShaderStage.Fragment) { + bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat(); + if (Config.OmapDepth) { Operand dest = Attribute(AttributeConsts.FragmentOutputDepth); @@ -214,7 +284,40 @@ namespace Ryujinx.Graphics.Shader.Translation this.Copy(dest, src); } - bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat(); + AlphaTestOp alphaTestOp = Config.GpuAccessor.QueryAlphaTestCompare(); + + if (alphaTestOp != AlphaTestOp.Always && (Config.OmapTargets & 8) != 0) + { + if (alphaTestOp == AlphaTestOp.Never) + { + this.Discard(); + } + else + { + Instruction comparator = alphaTestOp switch + { + AlphaTestOp.Equal => Instruction.CompareEqual, + AlphaTestOp.Greater => Instruction.CompareGreater, + AlphaTestOp.GreaterOrEqual => Instruction.CompareGreaterOrEqual, + AlphaTestOp.Less => Instruction.CompareLess, + AlphaTestOp.LessOrEqual => Instruction.CompareLessOrEqual, + AlphaTestOp.NotEqual => Instruction.CompareNotEqual, + _ => 0 + }; + + Debug.Assert(comparator != 0, $"Invalid alpha test operation \"{alphaTestOp}\"."); + + Operand alpha = Register(3, RegisterType.Gpr); + Operand alphaRef = ConstF(Config.GpuAccessor.QueryAlphaTestReference()); + Operand alphaPass = Add(Instruction.FP32 | comparator, Local(), alpha, alphaRef); + Operand alphaPassLabel = Label(); + + this.BranchIfTrue(alphaPassLabel, alphaPass); + this.Discard(); + this.MarkLabel(alphaPassLabel); + } + } + int regIndexBase = 0; for (int rtIndex = 0; rtIndex < 8; rtIndex++) diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 27d72cd53..60c3a0b4c 100644 --- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; using System; using System.Collections.Generic; using System.Linq; @@ -14,6 +15,7 @@ namespace Ryujinx.Graphics.Shader.Translation public ShaderStage Stage { get; } public bool GpPassthrough { get; } + public bool GpPassthroughWithHostSupport => GpPassthrough && GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(); public bool LastInVertexPipeline { get; private set; } public int ThreadsPerInputPrimitive { get; } @@ -587,7 +589,7 @@ namespace Ryujinx.Graphics.Shader.Translation return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, GpuAccessor.QueryBindingImage); } - private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary dict, Func getBindingCallback) + private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary dict, Func getBindingCallback) { var descriptors = new TextureDescriptor[dict.Count]; @@ -597,7 +599,8 @@ namespace Ryujinx.Graphics.Shader.Translation var info = kv.Key; var meta = kv.Value; - int binding = getBindingCallback(i); + bool isBuffer = (meta.Type & SamplerType.Mask) == SamplerType.TextureBuffer; + int binding = getBindingCallback(i, isBuffer); descriptors[i] = new TextureDescriptor(binding, meta.Type, info.Format, info.CbufSlot, info.Handle); descriptors[i].SetFlag(meta.UsageFlags); @@ -606,5 +609,52 @@ namespace Ryujinx.Graphics.Shader.Translation return descriptors; } + + public (TextureDescriptor, int) FindTextureDescriptor(AstTextureOperation texOp) + { + TextureDescriptor[] descriptors = GetTextureDescriptors(); + + for (int i = 0; i < descriptors.Length; i++) + { + var descriptor = descriptors[i]; + + if (descriptor.CbufSlot == texOp.CbufSlot && + descriptor.HandleIndex == texOp.Handle && + descriptor.Format == texOp.Format) + { + return (descriptor, i); + } + } + + return (default, -1); + } + + private static int FindDescriptorIndex(TextureDescriptor[] array, AstTextureOperation texOp) + { + for (int i = 0; i < array.Length; i++) + { + var descriptor = array[i]; + + if (descriptor.Type == texOp.Type && + descriptor.CbufSlot == texOp.CbufSlot && + descriptor.HandleIndex == texOp.Handle && + descriptor.Format == texOp.Format) + { + return i; + } + } + + return -1; + } + + public int FindTextureDescriptorIndex(AstTextureOperation texOp) + { + return FindDescriptorIndex(GetTextureDescriptors(), texOp); + } + + public int FindImageDescriptorIndex(AstTextureOperation texOp) + { + return FindDescriptorIndex(GetImageDescriptors(), texOp); + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/Ssa.cs b/Ryujinx.Graphics.Shader/Translation/Ssa.cs index ff0fa2b7f..8c63d72dd 100644 --- a/Ryujinx.Graphics.Shader/Translation/Ssa.cs +++ b/Ryujinx.Graphics.Shader/Translation/Ssa.cs @@ -63,6 +63,51 @@ namespace Ryujinx.Graphics.Shader.Translation } } + private class LocalDefMap + { + private Operand[] _map; + private int[] _uses; + public int UseCount { get; private set; } + + public LocalDefMap() + { + _map = new Operand[RegisterConsts.TotalCount]; + _uses = new int[RegisterConsts.TotalCount]; + } + + public Operand Get(int key) + { + return _map[key]; + } + + public void Add(int key, Operand operand) + { + if (_map[key] == null) + { + _uses[UseCount++] = key; + } + + _map[key] = operand; + } + + public Operand GetUse(int index, out int key) + { + key = _uses[index]; + + return _map[key]; + } + + public void Clear() + { + for (int i = 0; i < UseCount; i++) + { + _map[_uses[i]] = null; + } + + UseCount = 0; + } + } + private struct Definition { public BasicBlock Block { get; } @@ -78,6 +123,7 @@ namespace Ryujinx.Graphics.Shader.Translation public static void Rename(BasicBlock[] blocks) { DefMap[] globalDefs = new DefMap[blocks.Length]; + LocalDefMap localDefs = new LocalDefMap(); for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) { @@ -89,13 +135,11 @@ namespace Ryujinx.Graphics.Shader.Translation // First pass, get all defs and locals uses. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) { - Operand[] localDefs = new Operand[RegisterConsts.TotalCount]; - Operand RenameLocal(Operand operand) { if (operand != null && operand.Type == OperandType.Register) { - Operand local = localDefs[GetKeyFromRegister(operand.GetRegister())]; + Operand local = localDefs.Get(GetKeyFromRegister(operand.GetRegister())); operand = local ?? operand; } @@ -124,7 +168,7 @@ namespace Ryujinx.Graphics.Shader.Translation { Operand local = Local(); - localDefs[GetKeyFromRegister(dest.GetRegister())] = local; + localDefs.Add(GetKeyFromRegister(dest.GetRegister()), local); operation.SetDest(index, local); } @@ -134,16 +178,12 @@ namespace Ryujinx.Graphics.Shader.Translation node = node.Next; } - for (int index = 0; index < RegisterConsts.TotalCount; index++) + int localUses = localDefs.UseCount; + for (int index = 0; index < localUses; index++) { - Operand local = localDefs[index]; + Operand local = localDefs.GetUse(index, out int key); - if (local == null) - { - continue; - } - - Register reg = GetRegisterFromKey(index); + Register reg = GetRegisterFromKey(key); globalDefs[block.Index].TryAddOperand(reg, local); @@ -160,13 +200,13 @@ namespace Ryujinx.Graphics.Shader.Translation } } } + + localDefs.Clear(); } // Second pass, rename variables with definitions on different blocks. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) { - Operand[] localDefs = new Operand[RegisterConsts.TotalCount]; - BasicBlock block = blocks[blkIndex]; Operand RenameGlobal(Operand operand) @@ -175,7 +215,7 @@ namespace Ryujinx.Graphics.Shader.Translation { int key = GetKeyFromRegister(operand.GetRegister()); - Operand local = localDefs[key]; + Operand local = localDefs.Get(key); if (local != null) { @@ -184,7 +224,7 @@ namespace Ryujinx.Graphics.Shader.Translation operand = FindDefinitionForCurr(globalDefs, block, operand.GetRegister()); - localDefs[key] = operand; + localDefs.Add(key, operand); } return operand; @@ -200,6 +240,11 @@ namespace Ryujinx.Graphics.Shader.Translation } } } + + if (blkIndex < blocks.Length - 1) + { + localDefs.Clear(); + } } } diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs index e1614e660..7bddf4590 100644 --- a/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.Shader.CodeGen.Glsl; +using Ryujinx.Graphics.Shader.CodeGen.Spirv; using Ryujinx.Graphics.Shader.Decoders; using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; @@ -72,16 +73,15 @@ namespace Ryujinx.Graphics.Shader.Translation Ssa.Rename(cfg.Blocks); Optimizer.RunPass(cfg.Blocks, config); - Rewriter.RunPass(cfg.Blocks, config); } funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount); } - StructuredProgramInfo sInfo = StructuredProgram.MakeStructuredProgram(funcs, config); + var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config); - ShaderProgramInfo info = new ShaderProgramInfo( + var info = new ShaderProgramInfo( config.GetConstantBufferDescriptors(), config.GetStorageBufferDescriptors(), config.GetTextureDescriptors(), @@ -95,6 +95,7 @@ namespace Ryujinx.Graphics.Shader.Translation return config.Options.TargetLanguage switch { TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), + TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)), _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()) }; } @@ -105,7 +106,7 @@ namespace Ryujinx.Graphics.Shader.Translation DecodedProgram program; ulong maxEndAddress = 0; - if ((options.Flags & TranslationFlags.Compute) != 0) + if (options.Flags.HasFlag(TranslationFlags.Compute)) { config = new ShaderConfig(gpuAccessor, options); diff --git a/Ryujinx.Graphics.Texture/Astc/AstcDecoder.cs b/Ryujinx.Graphics.Texture/Astc/AstcDecoder.cs index 238f46a07..08738583e 100644 --- a/Ryujinx.Graphics.Texture/Astc/AstcDecoder.cs +++ b/Ryujinx.Graphics.Texture/Astc/AstcDecoder.cs @@ -291,7 +291,7 @@ namespace Ryujinx.Graphics.Texture.Astc int depth, int levels, int layers, - out Span decoded) + out byte[] decoded) { byte[] output = new byte[QueryDecompressedSize(width, height, depth, levels, layers)]; diff --git a/Ryujinx.Graphics.Texture/BC6Decoder.cs b/Ryujinx.Graphics.Texture/BC6Decoder.cs new file mode 100644 index 000000000..819bf022f --- /dev/null +++ b/Ryujinx.Graphics.Texture/BC6Decoder.cs @@ -0,0 +1,819 @@ +using Ryujinx.Graphics.Texture.Utils; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Texture +{ + static class BC6Decoder + { + private const int HalfOne = 0x3C00; + + public static void Decode(Span output, ReadOnlySpan data, int width, int height, bool signed) + { + ReadOnlySpan blocks = MemoryMarshal.Cast(data); + + Span output64 = MemoryMarshal.Cast(output); + + int wInBlocks = (width + 3) / 4; + int hInBlocks = (height + 3) / 4; + + for (int y = 0; y < hInBlocks; y++) + { + int y2 = y * 4; + int bh = Math.Min(4, height - y2); + + for (int x = 0; x < wInBlocks; x++) + { + int x2 = x * 4; + int bw = Math.Min(4, width - x2); + + DecodeBlock(blocks[y * wInBlocks + x], output64.Slice(y2 * width + x2), bw, bh, width, signed); + } + } + } + + private static void DecodeBlock(Block block, Span output, int w, int h, int width, bool signed) + { + int mode = (int)(block.Low & 3); + if ((mode & 2) != 0) + { + mode = (int)(block.Low & 0x1f); + } + + Span endPoints = stackalloc RgbaColor32[4]; + int subsetCount = DecodeEndPoints(ref block, endPoints, mode, signed); + if (subsetCount == 0) + { + // Mode is invalid, the spec mandates that hardware fills the block with + // a opaque black color. + for (int ty = 0; ty < h; ty++) + { + int baseOffs = ty * width; + + for (int tx = 0; tx < w; tx++) + { + output[baseOffs + tx] = (ulong)HalfOne << 48; + } + } + + return; + } + + int partition; + int indexBitCount; + ulong indices; + + if (subsetCount > 1) + { + partition = (int)((block.High >> 13) & 0x1F); + indexBitCount = 3; + + int fixUpIndex = BC67Tables.FixUpIndices[subsetCount - 1][partition][1] * 3; + ulong lowMask = (ulong.MaxValue >> (65 - fixUpIndex)) << 3; + ulong highMask = ulong.MaxValue << (fixUpIndex + 3); + + indices = ((block.High >> 16) & highMask) | ((block.High >> 17) & lowMask) | ((block.High >> 18) & 3); + } + else + { + partition = 0; + indexBitCount = 4; + indices = (block.High & ~0xFUL) | ((block.High >> 1) & 7); + } + + ulong indexMask = (1UL << indexBitCount) - 1; + + for (int ty = 0; ty < h; ty++) + { + int baseOffs = ty * width; + + for (int tx = 0; tx < w; tx++) + { + int offs = baseOffs + tx; + int index = (int)(indices & indexMask); + int endPointBase = BC67Tables.PartitionTable[subsetCount - 1][partition][ty * 4 + tx] << 1; + + RgbaColor32 color1 = endPoints[endPointBase]; + RgbaColor32 color2 = endPoints[endPointBase + 1]; + + RgbaColor32 color = BC67Utils.Interpolate(color1, color2, index, indexBitCount); + + output[offs] = + (ulong)FinishUnquantize(color.R, signed) | + ((ulong)FinishUnquantize(color.G, signed) << 16) | + ((ulong)FinishUnquantize(color.B, signed) << 32) | + ((ulong)HalfOne << 48); + + indices >>= indexBitCount; + } + } + } + + private static int DecodeEndPoints(ref Block block, Span endPoints, int mode, bool signed) + { + ulong low = block.Low; + ulong high = block.High; + + int r0 = 0, g0 = 0, b0 = 0, r1 = 0, g1 = 0, b1 = 0, r2 = 0, g2 = 0, b2 = 0, r3 = 0, g3 = 0, b3 = 0; + int subsetCount; + + switch (mode) + { + case 0: + r0 = (int)(low >> 5) & 0x3FF; + g0 = (int)(low >> 15) & 0x3FF; + b0 = (int)(low >> 25) & 0x3FF; + + if (signed) + { + r0 = SignExtend(r0, 10); + g0 = SignExtend(g0, 10); + b0 = SignExtend(b0, 10); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(((low << 2) & 0x10) | ((low >> 41) & 0xF)), 5); + b2 = b0 + SignExtend((int)(((low << 1) & 0x10) | ((high << 3) & 0x08) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5); + b3 = b0 + SignExtend((int)( + ((low) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x04) | + ((low >> 59) & 0x02) | + ((low >> 50) & 0x01)), 5); + + r0 = Unquantize(r0, 10, signed); + g0 = Unquantize(g0, 10, signed); + b0 = Unquantize(b0, 10, signed); + + r1 = Unquantize(r1 & 0x3FF, 10, signed); + g1 = Unquantize(g1 & 0x3FF, 10, signed); + b1 = Unquantize(b1 & 0x3FF, 10, signed); + + r2 = Unquantize(r2 & 0x3FF, 10, signed); + g2 = Unquantize(g2 & 0x3FF, 10, signed); + b2 = Unquantize(b2 & 0x3FF, 10, signed); + + r3 = Unquantize(r3 & 0x3FF, 10, signed); + g3 = Unquantize(g3 & 0x3FF, 10, signed); + b3 = Unquantize(b3 & 0x3FF, 10, signed); + + subsetCount = 2; + break; + case 1: + r0 = (int)(low >> 5) & 0x7F; + g0 = (int)(low >> 15) & 0x7F; + b0 = (int)(low >> 25) & 0x7F; + + if (signed) + { + r0 = SignExtend(r0, 7); + g0 = SignExtend(g0, 7); + b0 = SignExtend(b0, 7); + } + + r1 = r0 + SignExtend((int)(low >> 35), 6); + g1 = g0 + SignExtend((int)(low >> 45), 6); + b1 = b0 + SignExtend((int)(low >> 55), 6); + + r2 = r0 + SignExtend((int)(high >> 1), 6); + g2 = g0 + SignExtend((int)(((low << 3) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0x0F)), 6); + b2 = b0 + SignExtend((int)( + ((low >> 17) & 0x20) | + ((low >> 10) & 0x10) | + ((high << 3) & 0x08) | + (low >> 61)), 6); + + r3 = r0 + SignExtend((int)(high >> 7), 6); + g3 = g0 + SignExtend((int)(((low << 1) & 0x30) | ((low >> 51) & 0xF)), 6); + b3 = b0 + SignExtend((int)( + ((low >> 28) & 0x20) | + ((low >> 30) & 0x10) | + ((low >> 29) & 0x08) | + ((low >> 21) & 0x04) | + ((low >> 12) & 0x03)), 6); + + r0 = Unquantize(r0, 7, signed); + g0 = Unquantize(g0, 7, signed); + b0 = Unquantize(b0, 7, signed); + + r1 = Unquantize(r1 & 0x7F, 7, signed); + g1 = Unquantize(g1 & 0x7F, 7, signed); + b1 = Unquantize(b1 & 0x7F, 7, signed); + + r2 = Unquantize(r2 & 0x7F, 7, signed); + g2 = Unquantize(g2 & 0x7F, 7, signed); + b2 = Unquantize(b2 & 0x7F, 7, signed); + + r3 = Unquantize(r3 & 0x7F, 7, signed); + g3 = Unquantize(g3 & 0x7F, 7, signed); + b3 = Unquantize(b3 & 0x7F, 7, signed); + + subsetCount = 2; + break; + case 2: + r0 = (int)(((low >> 30) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 39) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((low >> 49) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 11); + g0 = SignExtend(g0, 11); + b0 = SignExtend(b0, 11); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 4); + b1 = b0 + SignExtend((int)(low >> 55), 4); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(low >> 41), 4); + b2 = b0 + SignExtend((int)(((high << 3) & 8) | (low >> 61)), 4); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(low >> 51), 4); + b3 = b0 + SignExtend((int)( + ((high >> 9) & 8) | + ((high >> 4) & 4) | + ((low >> 59) & 2) | + ((low >> 50) & 1)), 4); + + r0 = Unquantize(r0, 11, signed); + g0 = Unquantize(g0, 11, signed); + b0 = Unquantize(b0, 11, signed); + + r1 = Unquantize(r1 & 0x7FF, 11, signed); + g1 = Unquantize(g1 & 0x7FF, 11, signed); + b1 = Unquantize(b1 & 0x7FF, 11, signed); + + r2 = Unquantize(r2 & 0x7FF, 11, signed); + g2 = Unquantize(g2 & 0x7FF, 11, signed); + b2 = Unquantize(b2 & 0x7FF, 11, signed); + + r3 = Unquantize(r3 & 0x7FF, 11, signed); + g3 = Unquantize(g3 & 0x7FF, 11, signed); + b3 = Unquantize(b3 & 0x7FF, 11, signed); + + subsetCount = 2; + break; + case 3: + r0 = (int)(low >> 5) & 0x3FF; + g0 = (int)(low >> 15) & 0x3FF; + b0 = (int)(low >> 25) & 0x3FF; + + r1 = (int)(low >> 35) & 0x3FF; + g1 = (int)(low >> 45) & 0x3FF; + b1 = (int)(((high << 9) & 0x200) | (low >> 55)); + + if (signed) + { + r0 = SignExtend(r0, 10); + g0 = SignExtend(g0, 10); + b0 = SignExtend(b0, 10); + + r1 = SignExtend(r1, 10); + g1 = SignExtend(g1, 10); + b1 = SignExtend(b1, 10); + } + + r0 = Unquantize(r0, 10, signed); + g0 = Unquantize(g0, 10, signed); + b0 = Unquantize(b0, 10, signed); + + r1 = Unquantize(r1, 10, signed); + g1 = Unquantize(g1, 10, signed); + b1 = Unquantize(b1, 10, signed); + + subsetCount = 1; + break; + case 6: + r0 = (int)(((low >> 29) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 40) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((low >> 49) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 11); + g0 = SignExtend(g0, 11); + b0 = SignExtend(b0, 11); + } + + r1 = r0 + SignExtend((int)(low >> 35), 4); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 4); + + r2 = r0 + SignExtend((int)(high >> 1), 4); + g2 = g0 + SignExtend((int)(((high >> 7) & 0x10) | ((low >> 41) & 0x0F)), 5); + b2 = b0 + SignExtend((int)(((high << 3) & 0x08) | ((low >> 61))), 4); + + r3 = r0 + SignExtend((int)(high >> 7), 4); + g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0x0F)), 5); + b3 = b0 + SignExtend((int)( + ((high >> 9) & 8) | + ((high >> 4) & 4) | + ((low >> 59) & 2) | + ((high >> 5) & 1)), 4); + + r0 = Unquantize(r0, 11, signed); + g0 = Unquantize(g0, 11, signed); + b0 = Unquantize(b0, 11, signed); + + r1 = Unquantize(r1 & 0x7FF, 11, signed); + g1 = Unquantize(g1 & 0x7FF, 11, signed); + b1 = Unquantize(b1 & 0x7FF, 11, signed); + + r2 = Unquantize(r2 & 0x7FF, 11, signed); + g2 = Unquantize(g2 & 0x7FF, 11, signed); + b2 = Unquantize(b2 & 0x7FF, 11, signed); + + r3 = Unquantize(r3 & 0x7FF, 11, signed); + g3 = Unquantize(g3 & 0x7FF, 11, signed); + b3 = Unquantize(b3 & 0x7FF, 11, signed); + + subsetCount = 2; + break; + case 7: + r0 = (int)(((low >> 34) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 44) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((high << 10) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 11); + g0 = SignExtend(g0, 11); + b0 = SignExtend(b0, 11); + } + + r1 = (r0 + SignExtend((int)(low >> 35), 9)) & 0x7FF; + g1 = (g0 + SignExtend((int)(low >> 45), 9)) & 0x7FF; + b1 = (b0 + SignExtend((int)(low >> 55), 9)) & 0x7FF; + + r0 = Unquantize(r0, 11, signed); + g0 = Unquantize(g0, 11, signed); + b0 = Unquantize(b0, 11, signed); + + r1 = Unquantize(r1, 11, signed); + g1 = Unquantize(g1, 11, signed); + b1 = Unquantize(b1, 11, signed); + + subsetCount = 1; + break; + case 10: + r0 = (int)(((low >> 29) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 39) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((low >> 50) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 11); + g0 = SignExtend(g0, 11); + b0 = SignExtend(b0, 11); + } + + r1 = r0 + SignExtend((int)(low >> 35), 4); + g1 = g0 + SignExtend((int)(low >> 45), 4); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 4); + g2 = g0 + SignExtend((int)(low >> 41), 4); + b2 = b0 + SignExtend((int)(((low >> 36) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 4); + g3 = g0 + SignExtend((int)(low >> 51), 4); + b3 = b0 + SignExtend((int)( + ((high >> 7) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x06) | + ((low >> 50) & 0x01)), 5); + + r0 = Unquantize(r0, 11, signed); + g0 = Unquantize(g0, 11, signed); + b0 = Unquantize(b0, 11, signed); + + r1 = Unquantize(r1 & 0x7FF, 11, signed); + g1 = Unquantize(g1 & 0x7FF, 11, signed); + b1 = Unquantize(b1 & 0x7FF, 11, signed); + + r2 = Unquantize(r2 & 0x7FF, 11, signed); + g2 = Unquantize(g2 & 0x7FF, 11, signed); + b2 = Unquantize(b2 & 0x7FF, 11, signed); + + r3 = Unquantize(r3 & 0x7FF, 11, signed); + g3 = Unquantize(g3 & 0x7FF, 11, signed); + b3 = Unquantize(b3 & 0x7FF, 11, signed); + + subsetCount = 2; + break; + case 11: + r0 = (int)(((low >> 32) & 0x800) | ((low >> 34) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 42) & 0x800) | ((low >> 44) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((low >> 52) & 0x800) | ((high << 10) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 12); + g0 = SignExtend(g0, 12); + b0 = SignExtend(b0, 12); + } + + r1 = (r0 + SignExtend((int)(low >> 35), 8)) & 0xFFF; + g1 = (g0 + SignExtend((int)(low >> 45), 8)) & 0xFFF; + b1 = (b0 + SignExtend((int)(low >> 55), 8)) & 0xFFF; + + r0 = Unquantize(r0, 12, signed); + g0 = Unquantize(g0, 12, signed); + b0 = Unquantize(b0, 12, signed); + + r1 = Unquantize(r1, 12, signed); + g1 = Unquantize(g1, 12, signed); + b1 = Unquantize(b1, 12, signed); + + subsetCount = 1; + break; + case 14: + r0 = (int)(low >> 5) & 0x1FF; + g0 = (int)(low >> 15) & 0x1FF; + b0 = (int)(low >> 25) & 0x1FF; + + if (signed) + { + r0 = SignExtend(r0, 9); + g0 = SignExtend(g0, 9); + b0 = SignExtend(b0, 9); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5); + b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5); + b3 = b0 + SignExtend((int)( + ((low >> 30) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x04) | + ((low >> 59) & 0x02) | + ((low >> 50) & 0x01)), 5); + + r0 = Unquantize(r0, 9, signed); + g0 = Unquantize(g0, 9, signed); + b0 = Unquantize(b0, 9, signed); + + r1 = Unquantize(r1 & 0x1FF, 9, signed); + g1 = Unquantize(g1 & 0x1FF, 9, signed); + b1 = Unquantize(b1 & 0x1FF, 9, signed); + + r2 = Unquantize(r2 & 0x1FF, 9, signed); + g2 = Unquantize(g2 & 0x1FF, 9, signed); + b2 = Unquantize(b2 & 0x1FF, 9, signed); + + r3 = Unquantize(r3 & 0x1FF, 9, signed); + g3 = Unquantize(g3 & 0x1FF, 9, signed); + b3 = Unquantize(b3 & 0x1FF, 9, signed); + + subsetCount = 2; + break; + case 15: + r0 = (BitReverse6((int)(low >> 39) & 0x3F) << 10) | ((int)(low >> 5) & 0x3FF); + g0 = (BitReverse6((int)(low >> 49) & 0x3F) << 10) | ((int)(low >> 15) & 0x3FF); + b0 = ((BitReverse6((int)(low >> 59)) | (int)(high & 1)) << 10) | ((int)(low >> 25) & 0x3FF); + + if (signed) + { + r0 = SignExtend(r0, 16); + g0 = SignExtend(g0, 16); + b0 = SignExtend(b0, 16); + } + + r1 = (r0 + SignExtend((int)(low >> 35), 4)) & 0xFFFF; + g1 = (g0 + SignExtend((int)(low >> 45), 4)) & 0xFFFF; + b1 = (b0 + SignExtend((int)(low >> 55), 4)) & 0xFFFF; + + subsetCount = 1; + break; + case 18: + r0 = (int)(low >> 5) & 0xFF; + g0 = (int)(low >> 15) & 0xFF; + b0 = (int)(low >> 25) & 0xFF; + + if (signed) + { + r0 = SignExtend(r0, 8); + g0 = SignExtend(g0, 8); + b0 = SignExtend(b0, 8); + } + + r1 = r0 + SignExtend((int)(low >> 35), 6); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 6); + g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5); + b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 6); + g3 = g0 + SignExtend((int)(((low >> 9) & 0x10) | ((low >> 51) & 0xF)), 5); + b3 = b0 + SignExtend((int)( + ((low >> 30) & 0x18) | + ((low >> 21) & 0x04) | + ((low >> 59) & 0x02) | + ((low >> 50) & 0x01)), 5); + + r0 = Unquantize(r0, 8, signed); + g0 = Unquantize(g0, 8, signed); + b0 = Unquantize(b0, 8, signed); + + r1 = Unquantize(r1 & 0xFF, 8, signed); + g1 = Unquantize(g1 & 0xFF, 8, signed); + b1 = Unquantize(b1 & 0xFF, 8, signed); + + r2 = Unquantize(r2 & 0xFF, 8, signed); + g2 = Unquantize(g2 & 0xFF, 8, signed); + b2 = Unquantize(b2 & 0xFF, 8, signed); + + r3 = Unquantize(r3 & 0xFF, 8, signed); + g3 = Unquantize(g3 & 0xFF, 8, signed); + b3 = Unquantize(b3 & 0xFF, 8, signed); + + subsetCount = 2; + break; + case 22: + r0 = (int)(low >> 5) & 0xFF; + g0 = (int)(low >> 15) & 0xFF; + b0 = (int)(low >> 25) & 0xFF; + + if (signed) + { + r0 = SignExtend(r0, 8); + g0 = SignExtend(g0, 8); + b0 = SignExtend(b0, 8); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 6); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(((low >> 18) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 6); + b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 0x08) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(((low >> 28) & 0x20) | ((low >> 36) & 0x10) | ((low >> 51) & 0x0F)), 6); + b3 = b0 + SignExtend((int)( + ((low >> 30) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x04) | + ((low >> 59) & 0x02) | + ((low >> 13) & 0x01)), 5); + + r0 = Unquantize(r0, 8, signed); + g0 = Unquantize(g0, 8, signed); + b0 = Unquantize(b0, 8, signed); + + r1 = Unquantize(r1 & 0xFF, 8, signed); + g1 = Unquantize(g1 & 0xFF, 8, signed); + b1 = Unquantize(b1 & 0xFF, 8, signed); + + r2 = Unquantize(r2 & 0xFF, 8, signed); + g2 = Unquantize(g2 & 0xFF, 8, signed); + b2 = Unquantize(b2 & 0xFF, 8, signed); + + r3 = Unquantize(r3 & 0xFF, 8, signed); + g3 = Unquantize(g3 & 0xFF, 8, signed); + b3 = Unquantize(b3 & 0xFF, 8, signed); + + subsetCount = 2; + break; + case 26: + r0 = (int)(low >> 5) & 0xFF; + g0 = (int)(low >> 15) & 0xFF; + b0 = (int)(low >> 25) & 0xFF; + + if (signed) + { + r0 = SignExtend(r0, 8); + g0 = SignExtend(g0, 8); + b0 = SignExtend(b0, 8); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 6); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5); + b2 = b0 + SignExtend((int)( + ((low >> 18) & 0x20) | + ((low >> 10) & 0x10) | + ((high << 3) & 0x08) | + (low >> 61)), 6); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5); + b3 = b0 + SignExtend((int)( + ((low >> 28) & 0x20) | + ((low >> 30) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x04) | + ((low >> 12) & 0x02) | + ((low >> 50) & 0x01)), 6); + + r0 = Unquantize(r0, 8, signed); + g0 = Unquantize(g0, 8, signed); + b0 = Unquantize(b0, 8, signed); + + r1 = Unquantize(r1 & 0xFF, 8, signed); + g1 = Unquantize(g1 & 0xFF, 8, signed); + b1 = Unquantize(b1 & 0xFF, 8, signed); + + r2 = Unquantize(r2 & 0xFF, 8, signed); + g2 = Unquantize(g2 & 0xFF, 8, signed); + b2 = Unquantize(b2 & 0xFF, 8, signed); + + r3 = Unquantize(r3 & 0xFF, 8, signed); + g3 = Unquantize(g3 & 0xFF, 8, signed); + b3 = Unquantize(b3 & 0xFF, 8, signed); + + subsetCount = 2; + break; + case 30: + r0 = (int)(low >> 5) & 0x3F; + g0 = (int)(low >> 15) & 0x3F; + b0 = (int)(low >> 25) & 0x3F; + + r1 = (int)(low >> 35) & 0x3F; + g1 = (int)(low >> 45) & 0x3F; + b1 = (int)(low >> 55) & 0x3F; + + r2 = (int)(high >> 1) & 0x3F; + g2 = (int)(((low >> 16) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0xF)); + b2 = (int)(((low >> 17) & 0x20) | ((low >> 10) & 0x10) | ((high << 3) & 0x08) | (low >> 61)); + + r3 = (int)(high >> 7) & 0x3F; + g3 = (int)(((low >> 26) & 0x20) | ((low >> 7) & 0x10) | ((low >> 51) & 0xF)); + b3 = (int)( + ((low >> 28) & 0x20) | + ((low >> 30) & 0x10) | + ((low >> 29) & 0x08) | + ((low >> 21) & 0x04) | + ((low >> 12) & 0x03)); + + if (signed) + { + r0 = SignExtend(r0, 6); + g0 = SignExtend(g0, 6); + b0 = SignExtend(b0, 6); + + r1 = SignExtend(r1, 6); + g1 = SignExtend(g1, 6); + b1 = SignExtend(b1, 6); + + r2 = SignExtend(r2, 6); + g2 = SignExtend(g2, 6); + b2 = SignExtend(b2, 6); + + r3 = SignExtend(r3, 6); + g3 = SignExtend(g3, 6); + b3 = SignExtend(b3, 6); + } + + r0 = Unquantize(r0, 6, signed); + g0 = Unquantize(g0, 6, signed); + b0 = Unquantize(b0, 6, signed); + + r1 = Unquantize(r1, 6, signed); + g1 = Unquantize(g1, 6, signed); + b1 = Unquantize(b1, 6, signed); + + r2 = Unquantize(r2, 6, signed); + g2 = Unquantize(g2, 6, signed); + b2 = Unquantize(b2, 6, signed); + + r3 = Unquantize(r3, 6, signed); + g3 = Unquantize(g3, 6, signed); + b3 = Unquantize(b3, 6, signed); + + subsetCount = 2; + break; + default: + subsetCount = 0; + break; + } + + if (subsetCount > 0) + { + endPoints[0] = new RgbaColor32(r0, g0, b0, HalfOne); + endPoints[1] = new RgbaColor32(r1, g1, b1, HalfOne); + + if (subsetCount > 1) + { + endPoints[2] = new RgbaColor32(r2, g2, b2, HalfOne); + endPoints[3] = new RgbaColor32(r3, g3, b3, HalfOne); + } + } + + return subsetCount; + } + + private static int SignExtend(int value, int bits) + { + int shift = 32 - bits; + return (value << shift) >> shift; + } + + private static int Unquantize(int value, int bits, bool signed) + { + if (signed) + { + if (bits >= 16) + { + return value; + } + else + { + bool sign = value < 0; + + if (sign) + { + value = -value; + } + + if (value == 0) + { + return value; + } + else if (value >= ((1 << (bits - 1)) - 1)) + { + value = 0x7FFF; + } + else + { + value = ((value << 15) + 0x4000) >> (bits - 1); + } + + if (sign) + { + value = -value; + } + } + } + else + { + if (bits >= 15 || value == 0) + { + return value; + } + else if (value == ((1 << bits) - 1)) + { + return 0xFFFF; + } + else + { + return ((value << 16) + 0x8000) >> bits; + } + } + + return value; + } + + private static ushort FinishUnquantize(int value, bool signed) + { + if (signed) + { + value = value < 0 ? -((-value * 31) >> 5) : (value * 31) >> 5; + + int sign = 0; + if (value < 0) + { + sign = 0x8000; + value = -value; + } + + return (ushort)(sign | value); + } + else + { + return (ushort)((value * 31) >> 6); + } + } + + private static int BitReverse6(int value) + { + value = ((value >> 1) & 0x55) | ((value << 1) & 0xaa); + value = ((value >> 2) & 0x33) | ((value << 2) & 0xcc); + value = ((value >> 4) & 0x0f) | ((value << 4) & 0xf0); + return value >> 2; + } + } +} diff --git a/Ryujinx.Graphics.Texture/BC7Decoder.cs b/Ryujinx.Graphics.Texture/BC7Decoder.cs new file mode 100644 index 000000000..060d1ab85 --- /dev/null +++ b/Ryujinx.Graphics.Texture/BC7Decoder.cs @@ -0,0 +1,220 @@ +using Ryujinx.Graphics.Texture.Utils; +using System.Diagnostics; +using System; +using System.Numerics; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Texture +{ + static class BC7Decoder + { + public static void Decode(Span output, ReadOnlySpan data, int width, int height) + { + ReadOnlySpan blocks = MemoryMarshal.Cast(data); + + Span output32 = MemoryMarshal.Cast(output); + + int wInBlocks = (width + 3) / 4; + int hInBlocks = (height + 3) / 4; + + for (int y = 0; y < hInBlocks; y++) + { + int y2 = y * 4; + int bh = Math.Min(4, height - y2); + + for (int x = 0; x < wInBlocks; x++) + { + int x2 = x * 4; + int bw = Math.Min(4, width - x2); + + DecodeBlock(blocks[y * wInBlocks + x], output32.Slice(y2 * width + x2), bw, bh, width); + } + } + } + + private static void DecodeBlock(Block block, Span output, int w, int h, int width) + { + int mode = BitOperations.TrailingZeroCount((byte)block.Low | 0x100); + if (mode == 8) + { + // Mode is invalid, the spec mandates that hardware fills the block with + // a transparent black color. + for (int ty = 0; ty < h; ty++) + { + int baseOffs = ty * width; + + for (int tx = 0; tx < w; tx++) + { + int offs = baseOffs + tx; + + output[offs] = 0; + } + } + + return; + } + + BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; + + int offset = mode + 1; + int partition = (int)block.Decode(ref offset, modeInfo.PartitionBitCount); + int rotation = (int)block.Decode(ref offset, modeInfo.RotationBitCount); + int indexMode = (int)block.Decode(ref offset, modeInfo.IndexModeBitCount); + + Debug.Assert(partition < 64); + Debug.Assert(rotation < 4); + Debug.Assert(indexMode < 2); + + int endPointCount = modeInfo.SubsetCount * 2; + + Span endPoints = stackalloc RgbaColor32[endPointCount]; + Span pValues = stackalloc byte[modeInfo.PBits]; + + endPoints.Fill(new RgbaColor32(0, 0, 0, 255)); + + for (int i = 0; i < endPointCount; i++) + { + endPoints[i].R = (int)block.Decode(ref offset, modeInfo.ColorDepth); + } + + for (int i = 0; i < endPointCount; i++) + { + endPoints[i].G = (int)block.Decode(ref offset, modeInfo.ColorDepth); + } + + for (int i = 0; i < endPointCount; i++) + { + endPoints[i].B = (int)block.Decode(ref offset, modeInfo.ColorDepth); + } + + if (modeInfo.AlphaDepth != 0) + { + for (int i = 0; i < endPointCount; i++) + { + endPoints[i].A = (int)block.Decode(ref offset, modeInfo.AlphaDepth); + } + } + + for (int i = 0; i < modeInfo.PBits; i++) + { + pValues[i] = (byte)block.Decode(ref offset, 1); + } + + for (int i = 0; i < endPointCount; i++) + { + int pBit = -1; + + if (modeInfo.PBits != 0) + { + int pIndex = (i * modeInfo.PBits) / endPointCount; + pBit = pValues[pIndex]; + } + + Unquantize(ref endPoints[i], modeInfo.ColorDepth, modeInfo.AlphaDepth, pBit); + } + + byte[] partitionTable = BC67Tables.PartitionTable[modeInfo.SubsetCount - 1][partition]; + byte[] fixUpTable = BC67Tables.FixUpIndices[modeInfo.SubsetCount - 1][partition]; + + Span colorIndices = stackalloc byte[16]; + + for (int i = 0; i < 16; i++) + { + byte subset = partitionTable[i]; + int bitCount = i == fixUpTable[subset] ? modeInfo.ColorIndexBitCount - 1 : modeInfo.ColorIndexBitCount; + + colorIndices[i] = (byte)block.Decode(ref offset, bitCount); + Debug.Assert(colorIndices[i] < 16); + } + + Span alphaIndices = stackalloc byte[16]; + + if (modeInfo.AlphaIndexBitCount != 0) + { + for (int i = 0; i < 16; i++) + { + int bitCount = i != 0 ? modeInfo.AlphaIndexBitCount : modeInfo.AlphaIndexBitCount - 1; + + alphaIndices[i] = (byte)block.Decode(ref offset, bitCount); + Debug.Assert(alphaIndices[i] < 16); + } + } + + for (int ty = 0; ty < h; ty++) + { + int baseOffs = ty * width; + + for (int tx = 0; tx < w; tx++) + { + int i = ty * 4 + tx; + + RgbaColor32 color; + + byte subset = partitionTable[i]; + + RgbaColor32 color1 = endPoints[subset * 2]; + RgbaColor32 color2 = endPoints[subset * 2 + 1]; + + if (modeInfo.AlphaIndexBitCount != 0) + { + if (indexMode == 0) + { + color = BC67Utils.Interpolate(color1, color2, colorIndices[i], alphaIndices[i], modeInfo.ColorIndexBitCount, modeInfo.AlphaIndexBitCount); + } + else + { + color = BC67Utils.Interpolate(color1, color2, alphaIndices[i], colorIndices[i], modeInfo.AlphaIndexBitCount, modeInfo.ColorIndexBitCount); + } + } + else + { + color = BC67Utils.Interpolate(color1, color2, colorIndices[i], colorIndices[i], modeInfo.ColorIndexBitCount, modeInfo.ColorIndexBitCount); + } + + if (rotation != 0) + { + int a = color.A; + + switch (rotation) + { + case 1: color.A = color.R; color.R = a; break; + case 2: color.A = color.G; color.G = a; break; + case 3: color.A = color.B; color.B = a; break; + } + } + + RgbaColor8 color8 = color.GetColor8(); + + output[baseOffs + tx] = color8.ToUInt32(); + } + } + } + + private static void Unquantize(ref RgbaColor32 color, int colorDepth, int alphaDepth, int pBit) + { + color.R = UnquantizeComponent(color.R, colorDepth, pBit); + color.G = UnquantizeComponent(color.G, colorDepth, pBit); + color.B = UnquantizeComponent(color.B, colorDepth, pBit); + color.A = alphaDepth != 0 ? UnquantizeComponent(color.A, alphaDepth, pBit) : 255; + } + + private static int UnquantizeComponent(int component, int bits, int pBit) + { + int shift = 8 - bits; + int value = component << shift; + + if (pBit >= 0) + { + Debug.Assert(pBit <= 1); + value |= value >> (bits + 1); + value |= pBit << (shift - 1); + } + else + { + value |= value >> bits; + } + + return value; + } + } +} diff --git a/Ryujinx.Graphics.Texture/BCnDecoder.cs b/Ryujinx.Graphics.Texture/BCnDecoder.cs index b840cac89..b21fa4d10 100644 --- a/Ryujinx.Graphics.Texture/BCnDecoder.cs +++ b/Ryujinx.Graphics.Texture/BCnDecoder.cs @@ -298,9 +298,12 @@ namespace Ryujinx.Graphics.Texture for (int l = 0; l < levels; l++) { - size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers; + size += BitUtils.AlignUp(Math.Max(1, width >> l), 4) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers; } + // Backends currently expect a stride alignment of 4 bytes, so output width must be aligned. + int alignedWidth = BitUtils.AlignUp(width, 4); + byte[] output = new byte[size]; Span outputSpan = new Span(output); @@ -331,14 +334,14 @@ namespace Ryujinx.Graphics.Texture { int baseY = y * BlockHeight; int copyHeight = Math.Min(BlockHeight, height - baseY); - int lineBaseOOffs = imageBaseOOffs + baseY * width; + int lineBaseOOffs = imageBaseOOffs + baseY * alignedWidth; if (copyHeight == 4) { outputLine0 = MemoryMarshal.Cast(outputSpan.Slice(lineBaseOOffs)); - outputLine1 = MemoryMarshal.Cast(outputSpan.Slice(lineBaseOOffs + width)); - outputLine2 = MemoryMarshal.Cast(outputSpan.Slice(lineBaseOOffs + width * 2)); - outputLine3 = MemoryMarshal.Cast(outputSpan.Slice(lineBaseOOffs + width * 3)); + outputLine1 = MemoryMarshal.Cast(outputSpan.Slice(lineBaseOOffs + alignedWidth)); + outputLine2 = MemoryMarshal.Cast(outputSpan.Slice(lineBaseOOffs + alignedWidth * 2)); + outputLine3 = MemoryMarshal.Cast(outputSpan.Slice(lineBaseOOffs + alignedWidth * 3)); } for (int x = 0; x < w; x++) @@ -375,7 +378,7 @@ namespace Ryujinx.Graphics.Texture for (int tY = 0; tY < copyHeight; tY++) { - tile.Slice(tY * 4, copyWidth).CopyTo(outputSpan.Slice(pixelBaseOOffs + width * tY, copyWidth)); + tile.Slice(tY * 4, copyWidth).CopyTo(outputSpan.Slice(pixelBaseOOffs + alignedWidth * tY, copyWidth)); } } @@ -383,13 +386,15 @@ namespace Ryujinx.Graphics.Texture } } - imageBaseOOffs += width * height; + imageBaseOOffs += alignedWidth * height; } } width = Math.Max(1, width >> 1); height = Math.Max(1, height >> 1); depth = Math.Max(1, depth >> 1); + + alignedWidth = BitUtils.AlignUp(width, 4); } return output; @@ -401,9 +406,12 @@ namespace Ryujinx.Graphics.Texture for (int l = 0; l < levels; l++) { - size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 2; + size += BitUtils.AlignUp(Math.Max(1, width >> l), 2) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 2; } + // Backends currently expect a stride alignment of 4 bytes, so output width must be aligned. + int alignedWidth = BitUtils.AlignUp(width, 2); + byte[] output = new byte[size]; ReadOnlySpan data64 = MemoryMarshal.Cast(data); @@ -438,14 +446,14 @@ namespace Ryujinx.Graphics.Texture { int baseY = y * BlockHeight; int copyHeight = Math.Min(BlockHeight, height - baseY); - int lineBaseOOffs = imageBaseOOffs + baseY * width; + int lineBaseOOffs = imageBaseOOffs + baseY * alignedWidth; if (copyHeight == 4) { outputLine0 = MemoryMarshal.Cast(outputAsUshort.Slice(lineBaseOOffs)); - outputLine1 = MemoryMarshal.Cast(outputAsUshort.Slice(lineBaseOOffs + width)); - outputLine2 = MemoryMarshal.Cast(outputAsUshort.Slice(lineBaseOOffs + width * 2)); - outputLine3 = MemoryMarshal.Cast(outputAsUshort.Slice(lineBaseOOffs + width * 3)); + outputLine1 = MemoryMarshal.Cast(outputAsUshort.Slice(lineBaseOOffs + alignedWidth)); + outputLine2 = MemoryMarshal.Cast(outputAsUshort.Slice(lineBaseOOffs + alignedWidth * 2)); + outputLine3 = MemoryMarshal.Cast(outputAsUshort.Slice(lineBaseOOffs + alignedWidth * 3)); } for (int x = 0; x < w; x++) @@ -488,7 +496,7 @@ namespace Ryujinx.Graphics.Texture for (int tY = 0; tY < copyHeight; tY++) { - int line = pixelBaseOOffs + width * tY; + int line = pixelBaseOOffs + alignedWidth * tY; for (int tX = 0; tX < copyWidth; tX++) { @@ -503,7 +511,85 @@ namespace Ryujinx.Graphics.Texture } } - imageBaseOOffs += width * height; + imageBaseOOffs += alignedWidth * height; + } + } + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + depth = Math.Max(1, depth >> 1); + + alignedWidth = BitUtils.AlignUp(width, 2); + } + + return output; + } + + public static byte[] DecodeBC6(ReadOnlySpan data, int width, int height, int depth, int levels, int layers, bool signed) + { + int size = 0; + + for (int l = 0; l < levels; l++) + { + size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 8; + } + + byte[] output = new byte[size]; + + int inputOffset = 0; + int outputOffset = 0; + + for (int l = 0; l < levels; l++) + { + int w = BitUtils.DivRoundUp(width, BlockWidth); + int h = BitUtils.DivRoundUp(height, BlockHeight); + + for (int l2 = 0; l2 < layers; l2++) + { + for (int z = 0; z < depth; z++) + { + BC6Decoder.Decode(output.AsSpan().Slice(outputOffset), data.Slice(inputOffset), width, height, signed); + + inputOffset += w * h * 16; + outputOffset += width * height * 8; + } + } + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + depth = Math.Max(1, depth >> 1); + } + + return output; + } + + public static byte[] DecodeBC7(ReadOnlySpan data, int width, int height, int depth, int levels, int layers) + { + int size = 0; + + for (int l = 0; l < levels; l++) + { + size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4; + } + + byte[] output = new byte[size]; + + int inputOffset = 0; + int outputOffset = 0; + + for (int l = 0; l < levels; l++) + { + int w = BitUtils.DivRoundUp(width, BlockWidth); + int h = BitUtils.DivRoundUp(height, BlockHeight); + + for (int l2 = 0; l2 < layers; l2++) + { + for (int z = 0; z < depth; z++) + { + BC7Decoder.Decode(output.AsSpan().Slice(outputOffset), data.Slice(inputOffset), width, height); + + inputOffset += w * h * 16; + outputOffset += width * height * 4; } } diff --git a/Ryujinx.Graphics.Texture/BCnEncoder.cs b/Ryujinx.Graphics.Texture/BCnEncoder.cs new file mode 100644 index 000000000..02b79c1b8 --- /dev/null +++ b/Ryujinx.Graphics.Texture/BCnEncoder.cs @@ -0,0 +1,60 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Texture.Encoders; +using System; + +namespace Ryujinx.Graphics.Texture +{ + public static class BCnEncoder + { + private const int BlockWidth = 4; + private const int BlockHeight = 4; + + public static byte[] EncodeBC7(byte[] data, int width, int height, int depth, int levels, int layers) + { + int size = 0; + + for (int l = 0; l < levels; l++) + { + int w = BitUtils.DivRoundUp(Math.Max(1, width >> l), BlockWidth); + int h = BitUtils.DivRoundUp(Math.Max(1, height >> l), BlockHeight); + + size += w * h * 16 * Math.Max(1, depth >> l) * layers; + } + + byte[] output = new byte[size]; + + int imageBaseIOffs = 0; + int imageBaseOOffs = 0; + + for (int l = 0; l < levels; l++) + { + int rgba8Size = width * height * depth * layers * 4; + + int w = BitUtils.DivRoundUp(width, BlockWidth); + int h = BitUtils.DivRoundUp(height, BlockHeight); + + for (int l2 = 0; l2 < layers; l2++) + { + for (int z = 0; z < depth; z++) + { + BC7Encoder.Encode( + output.AsMemory().Slice(imageBaseOOffs), + data.AsMemory().Slice(imageBaseIOffs), + width, + height, + EncodeMode.Fast | EncodeMode.Multithreaded); + + imageBaseIOffs += width * height * 4; + imageBaseOOffs += w * h * 16; + } + } + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + depth = Math.Max(1, depth >> 1); + } + + return output; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs b/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs new file mode 100644 index 000000000..a69c10548 --- /dev/null +++ b/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs @@ -0,0 +1,1001 @@ +using Ryujinx.Graphics.Texture.Utils; +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Threading.Tasks; + +namespace Ryujinx.Graphics.Texture.Encoders +{ + static class BC7Encoder + { + public static void Encode(Memory outputStorage, ReadOnlyMemory data, int width, int height, EncodeMode mode) + { + int widthInBlocks = (width + 3) / 4; + int heightInBlocks = (height + 3) / 4; + + bool fastMode = (mode & EncodeMode.ModeMask) == EncodeMode.Fast; + + if (mode.HasFlag(EncodeMode.Multithreaded)) + { + Parallel.For(0, heightInBlocks, (yInBlocks) => + { + Span output = MemoryMarshal.Cast(outputStorage.Span); + int y = yInBlocks * 4; + + for (int xInBlocks = 0; xInBlocks < widthInBlocks; xInBlocks++) + { + int x = xInBlocks * 4; + Block block = CompressBlock(data.Span, x, y, width, height, fastMode); + + int offset = (yInBlocks * widthInBlocks + xInBlocks) * 2; + output[offset] = block.Low; + output[offset + 1] = block.High; + } + }); + } + else + { + Span output = MemoryMarshal.Cast(outputStorage.Span); + int offset = 0; + + for (int y = 0; y < height; y += 4) + { + for (int x = 0; x < width; x += 4) + { + Block block = CompressBlock(data.Span, x, y, width, height, fastMode); + + output[offset++] = block.Low; + output[offset++] = block.High; + } + } + } + } + + private static int[] _mostFrequentPartitions = new int[] + { + 0, 13, 2, 1, 15, 14, 10, 23 + }; + + private static Block CompressBlock(ReadOnlySpan data, int x, int y, int width, int height, bool fastMode) + { + int w = Math.Min(4, width - x); + int h = Math.Min(4, height - y); + + var dataUint = MemoryMarshal.Cast(data); + + int baseOffset = y * width + x; + + Span tile = stackalloc uint[w * h]; + + for (int ty = 0; ty < h; ty++) + { + int rowOffset = baseOffset + ty * width; + + for (int tx = 0; tx < w; tx++) + { + tile[ty * w + tx] = dataUint[rowOffset + tx]; + } + } + + return fastMode ? EncodeFast(tile, w, h) : EncodeExhaustive(tile, w, h); + } + + private static Block EncodeFast(ReadOnlySpan tile, int w, int h) + { + (RgbaColor8 minColor, RgbaColor8 maxColor) = BC67Utils.GetMinMaxColors(tile, w, h); + + bool alphaNotOne = minColor.A != 255 || maxColor.A != 255; + int variance = BC67Utils.SquaredDifference(minColor.GetColor32(), maxColor.GetColor32()); + int selectedMode; + int indexMode = 0; + + if (alphaNotOne) + { + bool constantAlpha = minColor.A == maxColor.A; + if (constantAlpha) + { + selectedMode = variance > 160 ? 7 : 6; + } + else + { + if (variance > 160) + { + Span uniqueRGB = stackalloc uint[16]; + Span uniqueAlpha = stackalloc uint[16]; + + int uniqueRGBCount = 0; + int uniqueAlphaCount = 0; + + uint rgbMask = new RgbaColor8(255, 255, 255, 0).ToUInt32(); + uint alphaMask = new RgbaColor8(0, 0, 0, 255).ToUInt32(); + + for (int i = 0; i < tile.Length; i++) + { + uint c = tile[i]; + + if (!uniqueRGB.Slice(0, uniqueRGBCount).Contains(c & rgbMask)) + { + uniqueRGB[uniqueRGBCount++] = c & rgbMask; + } + + if (!uniqueAlpha.Slice(0, uniqueAlphaCount).Contains(c & alphaMask)) + { + uniqueAlpha[uniqueAlphaCount++] = c & alphaMask; + } + } + + selectedMode = 4; + indexMode = uniqueRGBCount > uniqueAlphaCount ? 1 : 0; + } + else + { + selectedMode = 5; + } + } + } + else + { + if (variance > 160) + { + selectedMode = 1; + } + else + { + selectedMode = 6; + } + } + + int selectedPartition = 0; + + if (selectedMode == 1 || selectedMode == 7) + { + int partitionSelectionLowestError = int.MaxValue; + + for (int i = 0; i < _mostFrequentPartitions.Length; i++) + { + int p = _mostFrequentPartitions[i]; + int error = GetEndPointSelectionErrorFast(tile, 2, p, w, h, partitionSelectionLowestError); + if (error < partitionSelectionLowestError) + { + partitionSelectionLowestError = error; + selectedPartition = p; + } + } + } + + return Encode(selectedMode, selectedPartition, 0, indexMode, fastMode: true, tile, w, h, out _); + } + + private static Block EncodeExhaustive(ReadOnlySpan tile, int w, int h) + { + Block bestBlock = default; + int lowestError = int.MaxValue; + int lowestErrorSubsets = int.MaxValue; + + for (int m = 0; m < 8; m++) + { + for (int r = 0; r < (m == 4 || m == 5 ? 4 : 1); r++) + { + for (int im = 0; im < (m == 4 ? 2 : 1); im++) + { + for (int p = 0; p < 1 << BC67Tables.BC7ModeInfos[m].PartitionBitCount; p++) + { + Block block = Encode(m, p, r, im, fastMode: false, tile, w, h, out int maxError); + if (maxError < lowestError || (maxError == lowestError && BC67Tables.BC7ModeInfos[m].SubsetCount < lowestErrorSubsets)) + { + lowestError = maxError; + lowestErrorSubsets = BC67Tables.BC7ModeInfos[m].SubsetCount; + bestBlock = block; + } + } + } + } + } + + return bestBlock; + } + + private static Block Encode( + int mode, + int partition, + int rotation, + int indexMode, + bool fastMode, + ReadOnlySpan tile, + int w, + int h, + out int errorSum) + { + BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; + int subsetCount = modeInfo.SubsetCount; + int partitionBitCount = modeInfo.PartitionBitCount; + int rotationBitCount = modeInfo.RotationBitCount; + int indexModeBitCount = modeInfo.IndexModeBitCount; + int colorDepth = modeInfo.ColorDepth; + int alphaDepth = modeInfo.AlphaDepth; + int pBits = modeInfo.PBits; + int colorIndexBitCount = modeInfo.ColorIndexBitCount; + int alphaIndexBitCount = modeInfo.AlphaIndexBitCount; + bool separateAlphaIndices = alphaIndexBitCount != 0; + + uint alphaMask; + + if (separateAlphaIndices) + { + alphaMask = rotation switch + { + 1 => new RgbaColor8(255, 0, 0, 0).ToUInt32(), + 2 => new RgbaColor8(0, 255, 0, 0).ToUInt32(), + 3 => new RgbaColor8(0, 0, 255, 0).ToUInt32(), + _ => new RgbaColor8(0, 0, 0, 255).ToUInt32() + }; + } + else + { + alphaMask = new RgbaColor8(0, 0, 0, 0).ToUInt32(); + } + + if (indexMode != 0) + { + alphaMask = ~alphaMask; + } + + // + // Select color palette. + // + + Span endPoints0 = stackalloc uint[subsetCount]; + Span endPoints1 = stackalloc uint[subsetCount]; + + SelectEndPoints( + tile, + w, + h, + endPoints0, + endPoints1, + subsetCount, + partition, + colorIndexBitCount, + colorDepth, + alphaDepth, + ~alphaMask, + fastMode); + + if (separateAlphaIndices) + { + SelectEndPoints( + tile, + w, + h, + endPoints0, + endPoints1, + subsetCount, + partition, + alphaIndexBitCount, + colorDepth, + alphaDepth, + alphaMask, + fastMode); + } + + Span pBitValues = stackalloc int[pBits]; + + for (int i = 0; i < pBits; i++) + { + int pBit; + + if (pBits == subsetCount) + { + pBit = GetPBit(endPoints0[i], endPoints1[i], colorDepth, alphaDepth); + } + else + { + int subset = i >> 1; + uint color = (i & 1) == 0 ? endPoints0[subset] : endPoints1[subset]; + pBit = GetPBit(color, colorDepth, alphaDepth); + } + + pBitValues[i] = pBit; + } + + int colorIndexCount = 1 << colorIndexBitCount; + int alphaIndexCount = 1 << alphaIndexBitCount; + + Span colorIndices = stackalloc byte[16]; + Span alphaIndices = stackalloc byte[16]; + + errorSum = BC67Utils.SelectIndices( + tile, + w, + h, + endPoints0, + endPoints1, + pBitValues, + colorIndices, + subsetCount, + partition, + colorIndexBitCount, + colorIndexCount, + colorDepth, + alphaDepth, + pBits, + alphaMask); + + if (separateAlphaIndices) + { + errorSum += BC67Utils.SelectIndices( + tile, + w, + h, + endPoints0, + endPoints1, + pBitValues, + alphaIndices, + subsetCount, + partition, + alphaIndexBitCount, + alphaIndexCount, + colorDepth, + alphaDepth, + pBits, + ~alphaMask); + } + + Span colorSwapSubset = stackalloc bool[3]; + + for (int i = 0; i < 3; i++) + { + colorSwapSubset[i] = colorIndices[BC67Tables.FixUpIndices[subsetCount - 1][partition][i]] >= (colorIndexCount >> 1); + } + + bool alphaSwapSubset = alphaIndices[0] >= (alphaIndexCount >> 1); + + Block block = new Block(); + + int offset = 0; + + block.Encode(1UL << mode, ref offset, mode + 1); + block.Encode((ulong)partition, ref offset, partitionBitCount); + block.Encode((ulong)rotation, ref offset, rotationBitCount); + block.Encode((ulong)indexMode, ref offset, indexModeBitCount); + + for (int comp = 0; comp < 3; comp++) + { + int rotatedComp = comp; + + if (((comp + 1) & 3) == rotation) + { + rotatedComp = 3; + } + + for (int subset = 0; subset < subsetCount; subset++) + { + RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); + RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); + + int pBit0 = -1, pBit1 = -1; + + if (pBits == subsetCount) + { + pBit0 = pBit1 = pBitValues[subset]; + } + else if (pBits != 0) + { + pBit0 = pBitValues[subset * 2]; + pBit1 = pBitValues[subset * 2 + 1]; + } + + if (indexMode == 0 ? colorSwapSubset[subset] : alphaSwapSubset) + { + block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); + block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); + } + else + { + block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); + block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); + } + } + } + + if (alphaDepth != 0) + { + int rotatedComp = (rotation - 1) & 3; + + for (int subset = 0; subset < subsetCount; subset++) + { + RgbaColor8 color0 = RgbaColor8.FromUInt32(endPoints0[subset]); + RgbaColor8 color1 = RgbaColor8.FromUInt32(endPoints1[subset]); + + int pBit0 = -1, pBit1 = -1; + + if (pBits == subsetCount) + { + pBit0 = pBit1 = pBitValues[subset]; + } + else if (pBits != 0) + { + pBit0 = pBitValues[subset * 2]; + pBit1 = pBitValues[subset * 2 + 1]; + } + + if (separateAlphaIndices && indexMode == 0 ? alphaSwapSubset : colorSwapSubset[subset]) + { + block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); + block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); + } + else + { + block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); + block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); + } + } + } + + for (int i = 0; i < pBits; i++) + { + block.Encode((ulong)pBitValues[i], ref offset, 1); + } + + byte[] fixUpTable = BC67Tables.FixUpIndices[subsetCount - 1][partition]; + + for (int i = 0; i < 16; i++) + { + int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][i]; + byte index = colorIndices[i]; + + if (colorSwapSubset[subset]) + { + index = (byte)(index ^ (colorIndexCount - 1)); + } + + int finalIndexBitCount = i == fixUpTable[subset] ? colorIndexBitCount - 1 : colorIndexBitCount; + + if (index >= (1 << finalIndexBitCount)) + { + throw new Exception("invalid index " + index); + } + + block.Encode(index, ref offset, finalIndexBitCount); + } + + if (separateAlphaIndices) + { + for (int i = 0; i < 16; i++) + { + byte index = alphaIndices[i]; + + if (alphaSwapSubset) + { + index = (byte)(index ^ (alphaIndexCount - 1)); + } + + int finalIndexBitCount = i == 0 ? alphaIndexBitCount - 1 : alphaIndexBitCount; + + if (index >= (1 << finalIndexBitCount)) + { + throw new Exception("invalid alpha index " + index); + } + + block.Encode(index, ref offset, finalIndexBitCount); + } + } + + return block; + } + + private static unsafe int GetEndPointSelectionErrorFast(ReadOnlySpan tile, int subsetCount, int partition, int w, int h, int maxError) + { + byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; + + Span minColors = stackalloc RgbaColor8[subsetCount]; + Span maxColors = stackalloc RgbaColor8[subsetCount]; + + BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); + + Span endPoints0 = stackalloc uint[subsetCount]; + Span endPoints1 = stackalloc uint[subsetCount]; + + SelectEndPointsFast(partitionTable, tile, w, h, subsetCount, minColors, maxColors, endPoints0, endPoints1, uint.MaxValue); + + Span palette = stackalloc RgbaColor32[8]; + + int errorSum = 0; + + for (int subset = 0; subset < subsetCount; subset++) + { + RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); + int sum = blockDir.R + blockDir.G + blockDir.B + blockDir.A; + if (sum != 0) + { + blockDir = (blockDir << 6) / new RgbaColor32(sum); + } + + uint c0 = endPoints0[subset]; + uint c1 = endPoints1[subset]; + + int pBit0 = GetPBit(c0, 6, 0); + int pBit1 = GetPBit(c1, 6, 0); + + c0 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c0), 6, 0, pBit0).ToUInt32(); + c1 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c1), 6, 0, pBit1).ToUInt32(); + + if (Sse41.IsSupported) + { + Vector128 c0Rep = Vector128.Create(c0).AsByte(); + Vector128 c1Rep = Vector128.Create(c1).AsByte(); + + Vector128 c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); + + Vector128 rWeights; + Vector128 lWeights; + + fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) + { + rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); + lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); + } + + Vector128 iWeights = Sse2.UnpackLow(rWeights, lWeights); + Vector128 iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); + Vector128 iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); + Vector128 iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + Vector128 iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + + static Vector128 ShiftRoundToNearest(Vector128 x) + { + return Sse2.ShiftRightLogical(Sse2.Add(x, Vector128.Create((short)32)), 6); + } + + Vector128 pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); + Vector128 pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); + Vector128 pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); + Vector128 pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); + + for (int i = 0; i < tile.Length; i++) + { + if (partitionTable[i] != subset) + { + continue; + } + + uint c = tile[i]; + + Vector128 color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); + + Vector128 delta0 = Sse2.Subtract(color, pal0); + Vector128 delta1 = Sse2.Subtract(color, pal1); + Vector128 delta2 = Sse2.Subtract(color, pal2); + Vector128 delta3 = Sse2.Subtract(color, pal3); + + Vector128 deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); + Vector128 deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); + Vector128 deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); + Vector128 deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); + + Vector128 deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); + Vector128 deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); + + Vector128 delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); + + Vector128 min = Sse41.MinHorizontal(delta); + + errorSum += min.GetElement(0); + } + } + else + { + RgbaColor32 e032 = RgbaColor8.FromUInt32(c0).GetColor32(); + RgbaColor32 e132 = RgbaColor8.FromUInt32(c1).GetColor32(); + + palette[0] = e032; + palette[palette.Length - 1] = e132; + + for (int i = 1; i < palette.Length - 1; i++) + { + palette[i] = BC67Utils.Interpolate(e032, e132, i, 3); + } + + for (int i = 0; i < tile.Length; i++) + { + if (partitionTable[i] != subset) + { + continue; + } + + uint c = tile[i]; + RgbaColor32 color = Unsafe.As(ref c).GetColor32(); + + int bestMatchScore = int.MaxValue; + + for (int j = 0; j < palette.Length; j++) + { + int score = BC67Utils.SquaredDifference(color, palette[j]); + + if (score < bestMatchScore) + { + bestMatchScore = score; + } + } + + errorSum += bestMatchScore; + } + } + + // No point in continuing if we are already above maximum. + if (errorSum >= maxError) + { + return int.MaxValue; + } + } + + return errorSum; + } + + private static void SelectEndPoints( + ReadOnlySpan tile, + int w, + int h, + Span endPoints0, + Span endPoints1, + int subsetCount, + int partition, + int indexBitCount, + int colorDepth, + int alphaDepth, + uint writeMask, + bool fastMode) + { + byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; + + Span minColors = stackalloc RgbaColor8[subsetCount]; + Span maxColors = stackalloc RgbaColor8[subsetCount]; + + BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); + + uint inverseMask = ~writeMask; + + for (int i = 0; i < subsetCount; i++) + { + Unsafe.As(ref minColors[i]) |= inverseMask; + Unsafe.As(ref maxColors[i]) |= inverseMask; + } + + if (fastMode) + { + SelectEndPointsFast(partitionTable, tile, w, h, subsetCount, minColors, maxColors, endPoints0, endPoints1, writeMask); + } + else + { + Span colors = stackalloc RgbaColor8[subsetCount * 16]; + Span counts = stackalloc byte[subsetCount]; + + int i = 0; + for (int ty = 0; ty < h; ty++) + { + for (int tx = 0; tx < w; tx++) + { + int subset = partitionTable[ty * 4 + tx]; + RgbaColor8 color = RgbaColor8.FromUInt32(tile[i++] | inverseMask); + + static void AddIfNew(Span values, RgbaColor8 value, int subset, ref byte count) + { + for (int i = 0; i < count; i++) + { + if (values[subset * 16 + i] == value) + { + return; + } + } + + values[subset * 16 + count++] = value; + } + + AddIfNew(colors, color, subset, ref counts[subset]); + } + } + + for (int subset = 0; subset < subsetCount; subset++) + { + int offset = subset * 16; + + RgbaColor8 minColor = minColors[subset]; + RgbaColor8 maxColor = maxColors[subset]; + + ReadOnlySpan subsetColors = colors.Slice(offset, counts[subset]); + + (RgbaColor8 e0, RgbaColor8 e1) = SelectEndPoints(subsetColors, minColor, maxColor, indexBitCount, colorDepth, alphaDepth, inverseMask); + + endPoints0[subset] = (endPoints0[subset] & inverseMask) | (e0.ToUInt32() & writeMask); + endPoints1[subset] = (endPoints1[subset] & inverseMask) | (e1.ToUInt32() & writeMask); + } + } + } + + private static unsafe void SelectEndPointsFast( + ReadOnlySpan partitionTable, + ReadOnlySpan tile, + int w, + int h, + int subsetCount, + ReadOnlySpan minColors, + ReadOnlySpan maxColors, + Span endPoints0, + Span endPoints1, + uint writeMask) + { + uint inverseMask = ~writeMask; + + if (Sse41.IsSupported && w == 4 && h == 4) + { + Vector128 row0, row1, row2, row3; + Vector128 ones = Vector128.AllBitsSet; + + fixed (uint* pTile = tile) + { + row0 = Sse2.LoadVector128(pTile).AsByte(); + row1 = Sse2.LoadVector128(pTile + 4).AsByte(); + row2 = Sse2.LoadVector128(pTile + 8).AsByte(); + row3 = Sse2.LoadVector128(pTile + 12).AsByte(); + } + + Vector128 partitionMask; + + fixed (byte* pPartitionTable = partitionTable) + { + partitionMask = Sse2.LoadVector128(pPartitionTable); + } + + for (int subset = 0; subset < subsetCount; subset++) + { + RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); + int sum = blockDir.R + blockDir.G + blockDir.B + blockDir.A; + if (sum != 0) + { + blockDir = (blockDir << 6) / new RgbaColor32(sum); + } + + Vector128 bd = Vector128.Create(blockDir.GetColor8().ToUInt32()).AsByte(); + + Vector128 delta0 = Ssse3.MultiplyAddAdjacent(row0, bd.AsSByte()); + Vector128 delta1 = Ssse3.MultiplyAddAdjacent(row1, bd.AsSByte()); + Vector128 delta2 = Ssse3.MultiplyAddAdjacent(row2, bd.AsSByte()); + Vector128 delta3 = Ssse3.MultiplyAddAdjacent(row3, bd.AsSByte()); + + Vector128 delta01 = Ssse3.HorizontalAdd(delta0, delta1); + Vector128 delta23 = Ssse3.HorizontalAdd(delta2, delta3); + + Vector128 subsetMask = Sse2.Xor(Sse2.CompareEqual(partitionMask, Vector128.Create((byte)subset)), ones.AsByte()); + + Vector128 subsetMask01 = Sse2.UnpackLow(subsetMask, subsetMask).AsInt16(); + Vector128 subsetMask23 = Sse2.UnpackHigh(subsetMask, subsetMask).AsInt16(); + + Vector128 min01 = Sse41.MinHorizontal(Sse2.Or(delta01, subsetMask01).AsUInt16()); + Vector128 min23 = Sse41.MinHorizontal(Sse2.Or(delta23, subsetMask23).AsUInt16()); + Vector128 max01 = Sse41.MinHorizontal(Sse2.Xor(Sse2.AndNot(subsetMask01, delta01), ones).AsUInt16()); + Vector128 max23 = Sse41.MinHorizontal(Sse2.Xor(Sse2.AndNot(subsetMask23, delta23), ones).AsUInt16()); + + uint minPos01 = min01.AsUInt32().GetElement(0); + uint minPos23 = min23.AsUInt32().GetElement(0); + uint maxPos01 = max01.AsUInt32().GetElement(0); + uint maxPos23 = max23.AsUInt32().GetElement(0); + + uint minDistColor = (ushort)minPos23 < (ushort)minPos01 + ? tile[(int)(minPos23 >> 16) + 8] + : tile[(int)(minPos01 >> 16)]; + + // Note that we calculate the maximum as the minimum of the inverse, so less here is actually greater. + uint maxDistColor = (ushort)maxPos23 < (ushort)maxPos01 + ? tile[(int)(maxPos23 >> 16) + 8] + : tile[(int)(maxPos01 >> 16)]; + + endPoints0[subset] = (endPoints0[subset] & inverseMask) | (minDistColor & writeMask); + endPoints1[subset] = (endPoints1[subset] & inverseMask) | (maxDistColor & writeMask); + } + } + else + { + for (int subset = 0; subset < subsetCount; subset++) + { + RgbaColor32 blockDir = maxColors[subset].GetColor32() - minColors[subset].GetColor32(); + blockDir = RgbaColor32.DivideGuarded(blockDir << 6, new RgbaColor32(blockDir.R + blockDir.G + blockDir.B + blockDir.A), 0); + + int minDist = int.MaxValue; + int maxDist = int.MinValue; + + RgbaColor8 minDistColor = default; + RgbaColor8 maxDistColor = default; + + int i = 0; + for (int ty = 0; ty < h; ty++) + { + for (int tx = 0; tx < w; tx++, i++) + { + if (partitionTable[ty * 4 + tx] != subset) + { + continue; + } + + RgbaColor8 color = RgbaColor8.FromUInt32(tile[i]); + int dist = RgbaColor32.Dot(color.GetColor32(), blockDir); + + if (minDist > dist) + { + minDist = dist; + minDistColor = color; + } + + if (maxDist < dist) + { + maxDist = dist; + maxDistColor = color; + } + } + } + + endPoints0[subset] = (endPoints0[subset] & inverseMask) | (minDistColor.ToUInt32() & writeMask); + endPoints1[subset] = (endPoints1[subset] & inverseMask) | (maxDistColor.ToUInt32() & writeMask); + } + } + } + + private static (RgbaColor8, RgbaColor8) SelectEndPoints( + ReadOnlySpan values, + RgbaColor8 minValue, + RgbaColor8 maxValue, + int indexBitCount, + int colorDepth, + int alphaDepth, + uint alphaMask) + { + int n = values.Length; + int numInterpolatedColors = 1 << indexBitCount; + int numInterpolatedColorsMinus1 = numInterpolatedColors - 1; + + if (n == 0) + { + return (default, default); + } + + minValue = BC67Utils.Quantize(minValue, colorDepth, alphaDepth); + maxValue = BC67Utils.Quantize(maxValue, colorDepth, alphaDepth); + + RgbaColor32 blockDir = maxValue.GetColor32() - minValue.GetColor32(); + blockDir = RgbaColor32.DivideGuarded(blockDir << 6, new RgbaColor32(blockDir.R + blockDir.G + blockDir.B + blockDir.A), 0); + + int minDist = int.MaxValue; + int maxDist = 0; + + for (int i = 0; i < values.Length; i++) + { + RgbaColor8 color = values[i]; + int dist = RgbaColor32.Dot(BC67Utils.Quantize(color, colorDepth, alphaDepth).GetColor32(), blockDir); + + if (minDist >= dist) + { + minDist = dist; + } + + if (maxDist <= dist) + { + maxDist = dist; + } + } + + Span palette = stackalloc RgbaColor8[numInterpolatedColors]; + + int distRange = Math.Max(1, maxDist - minDist); + + RgbaColor32 nV = new RgbaColor32(n); + + int bestErrorSum = int.MaxValue; + RgbaColor8 bestE0 = default; + RgbaColor8 bestE1 = default; + + Span indices = stackalloc int[n]; + Span colors = stackalloc RgbaColor32[n]; + + for (int maxIndex = numInterpolatedColorsMinus1; maxIndex >= 1; maxIndex--) + { + int sumX = 0; + int sumXX = 0; + int sumXXIncrement = 0; + + for (int i = 0; i < values.Length; i++) + { + RgbaColor32 color = values[i].GetColor32(); + + int dist = RgbaColor32.Dot(color, blockDir); + + int normalizedValue = ((dist - minDist) << 6) / distRange; + int texelIndex = (normalizedValue * maxIndex + 32) >> 6; + + indices[i] = texelIndex; + colors[i] = color; + + sumX += texelIndex; + sumXX += texelIndex * texelIndex; + sumXXIncrement += 1 + texelIndex * 2; + } + + for (int start = 0; start < numInterpolatedColors - maxIndex; start++) + { + RgbaColor32 sumY = new RgbaColor32(0); + RgbaColor32 sumXY = new RgbaColor32(0); + + for (int i = 0; i < indices.Length; i++) + { + RgbaColor32 y = colors[i]; + + sumY += y; + sumXY += new RgbaColor32(start + indices[i]) * y; + } + + RgbaColor32 sumXV = new RgbaColor32(sumX); + RgbaColor32 sumXXV = new RgbaColor32(sumXX); + RgbaColor32 m = RgbaColor32.DivideGuarded((nV * sumXY - sumXV * sumY) << 6, nV * sumXXV - sumXV * sumXV, 0); + RgbaColor32 b = ((sumY << 6) - m * sumXV) / nV; + + RgbaColor8 candidateE0 = (b >> 6).GetColor8(); + RgbaColor8 candidateE1 = ((b + m * new RgbaColor32(numInterpolatedColorsMinus1)) >> 6).GetColor8(); + + int pBit0 = GetPBit(candidateE0.ToUInt32(), colorDepth, alphaDepth); + int pBit1 = GetPBit(candidateE1.ToUInt32(), colorDepth, alphaDepth); + + int errorSum = BC67Utils.SelectIndices( + MemoryMarshal.Cast(values), + candidateE0.ToUInt32(), + candidateE1.ToUInt32(), + pBit0, + pBit1, + indexBitCount, + numInterpolatedColors, + colorDepth, + alphaDepth, + alphaMask); + + if (errorSum <= bestErrorSum) + { + bestErrorSum = errorSum; + bestE0 = candidateE0; + bestE1 = candidateE1; + } + + sumX += n; + sumXX += sumXXIncrement; + sumXXIncrement += 2 * n; + } + } + + return (bestE0, bestE1); + } + + private static int GetPBit(uint color, int colorDepth, int alphaDepth) + { + uint mask = 0x808080u >> colorDepth; + + if (alphaDepth != 0) + { + mask |= 0x80000000u >> alphaDepth; + } + + color &= 0x7f7f7f7fu; + color += mask >> 1; + + int onesCount = BitOperations.PopCount(color & mask); + return onesCount >= 2 ? 1 : 0; + } + + private static int GetPBit(uint c0, uint c1, int colorDepth, int alphaDepth) + { + // Giving preference to the first endpoint yields better results, + // might be a side effect of the endpoint selection algorithm? + return GetPBit(c0, colorDepth, alphaDepth); + } + } +} diff --git a/Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs b/Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs new file mode 100644 index 000000000..5734d301e --- /dev/null +++ b/Ryujinx.Graphics.Texture/Encoders/EncodeMode.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Texture.Encoders +{ + enum EncodeMode + { + Fast, + Exhaustive, + ModeMask = 0xff, + Multithreaded = 1 << 8 + } +} diff --git a/Ryujinx.Graphics.Texture/Utils/BC67Tables.cs b/Ryujinx.Graphics.Texture/Utils/BC67Tables.cs new file mode 100644 index 000000000..d890652cb --- /dev/null +++ b/Ryujinx.Graphics.Texture/Utils/BC67Tables.cs @@ -0,0 +1,297 @@ +namespace Ryujinx.Graphics.Texture.Utils +{ + static class BC67Tables + { + public static readonly BC7ModeInfo[] BC7ModeInfos = new BC7ModeInfo[] + { + new BC7ModeInfo(3, 4, 6, 0, 0, 3, 0, 4, 0), + new BC7ModeInfo(2, 6, 2, 0, 0, 3, 0, 6, 0), + new BC7ModeInfo(3, 6, 0, 0, 0, 2, 0, 5, 0), + new BC7ModeInfo(2, 6, 4, 0, 0, 2, 0, 7, 0), + new BC7ModeInfo(1, 0, 0, 2, 1, 2, 3, 5, 6), + new BC7ModeInfo(1, 0, 0, 2, 0, 2, 2, 7, 8), + new BC7ModeInfo(1, 0, 2, 0, 0, 4, 0, 7, 7), + new BC7ModeInfo(2, 6, 4, 0, 0, 2, 0, 5, 5) + }; + + public static readonly byte[][] Weights = + { + new byte[] { 0, 21, 43, 64 }, + new byte[] { 0, 9, 18, 27, 37, 46, 55, 64 }, + new byte[] { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 } + }; + + public static readonly byte[][] InverseWeights = + { + new byte[] { 64, 43, 21, 0 }, + new byte[] { 64, 55, 46, 37, 27, 18, 9, 0 }, + new byte[] { 64, 60, 55, 51, 47, 43, 38, 34, 30, 26, 21, 17, 13, 9, 4, 0 } + }; + + public static readonly byte[][][] FixUpIndices = new byte[3][][] + { + new byte[64][] + { + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, + new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 } + }, + new byte[64][] + { + new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, + new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, + new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, + new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, + new byte[] { 0, 15, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, + new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 15, 0 }, + new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, + new byte[] { 0, 8, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, + new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 6, 0 }, new byte[] { 0, 8, 0 }, + new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, + new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, + new byte[] { 0, 2, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 6, 0 }, + new byte[] { 0, 6, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 6, 0 }, new byte[] { 0, 8, 0 }, + new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, + new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, + new byte[] { 0, 15, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 15, 0 } + }, + new byte[64][] + { + new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, new byte[] { 0, 15, 8 }, new byte[] { 0, 15, 3 }, + new byte[] { 0, 8, 15 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 15, 3 }, new byte[] { 0, 15, 8 }, + new byte[] { 0, 8, 15 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 6, 15 }, new byte[] { 0, 6, 15 }, + new byte[] { 0, 6, 15 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, + new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 15, 3 }, + new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, new byte[] { 0, 6, 15 }, new byte[] { 0, 10, 8 }, + new byte[] { 0, 5, 3 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 8, 6 }, new byte[] { 0, 6, 10 }, + new byte[] { 0, 8, 15 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 15, 10 }, new byte[] { 0, 15, 8 }, + new byte[] { 0, 8, 15 }, new byte[] { 0, 15, 3 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 5, 10 }, + new byte[] { 0, 6, 10 }, new byte[] { 0, 10, 8 }, new byte[] { 0, 8, 9 }, new byte[] { 0, 15, 10 }, + new byte[] { 0, 15, 6 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 15, 8 }, new byte[] { 0, 5, 15 }, + new byte[] { 0, 15, 3 }, new byte[] { 0, 15, 6 }, new byte[] { 0, 15, 6 }, new byte[] { 0, 15, 8 }, + new byte[] { 0, 3, 15 }, new byte[] { 0, 15, 3 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 5, 15 }, + new byte[] { 0, 5, 15 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 10, 15 }, + new byte[] { 0, 5, 15 }, new byte[] { 0, 10, 15 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 13, 15 }, + new byte[] { 0, 15, 3 }, new byte[] { 0, 12, 15 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 } + } + }; + + public static readonly byte[][][] PartitionTable = new byte[3][][] + { + new byte[64][] + { + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 0 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 1 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 2 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 3 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 4 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 5 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 6 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 7 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 8 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 9 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 10 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 11 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 12 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 13 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 14 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 15 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 16 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 17 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 18 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 19 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 20 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 21 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 22 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 23 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 24 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 25 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 26 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 27 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 28 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 29 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 30 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 31 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 32 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 33 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 34 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 35 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 36 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 37 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 38 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 39 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 40 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 41 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 42 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 43 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 44 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 45 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 46 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 47 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 48 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 49 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 50 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 51 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 52 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 53 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 54 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 55 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 56 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 57 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 58 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 59 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 60 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 61 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 62 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } // 63 + }, + new byte[64][] + { + new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // 0 + new byte[16] { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 }, // 1 + new byte[16] { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, // 2 + new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // 3 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 }, // 4 + new byte[16] { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // 5 + new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // 6 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // 7 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, // 8 + new byte[16] { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // 9 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // 10 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 }, // 11 + new byte[16] { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // 12 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, // 13 + new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // 14 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 }, // 15 + new byte[16] { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, // 16 + new byte[16] { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // 17 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 }, // 18 + new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // 19 + new byte[16] { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // 20 + new byte[16] { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, // 21 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // 22 + new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 }, // 23 + new byte[16] { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // 24 + new byte[16] { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // 25 + new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 }, // 26 + new byte[16] { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 }, // 27 + new byte[16] { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }, // 28 + new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // 29 + new byte[16] { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 }, // 30 + new byte[16] { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // 31 + new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, // 32 + new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, // 33 + new byte[16] { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 }, // 34 + new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 }, // 35 + new byte[16] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, // 36 + new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 }, // 37 + new byte[16] { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, // 38 + new byte[16] { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 }, // 39 + new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }, // 40 + new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, // 41 + new byte[16] { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }, // 42 + new byte[16] { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }, // 43 + new byte[16] { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }, // 44 + new byte[16] { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 }, // 45 + new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 }, // 46 + new byte[16] { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, // 47 + new byte[16] { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, // 48 + new byte[16] { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, // 49 + new byte[16] { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 }, // 50 + new byte[16] { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 }, // 51 + new byte[16] { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 }, // 52 + new byte[16] { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // 53 + new byte[16] { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // 54 + new byte[16] { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 }, // 55 + new byte[16] { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // 56 + new byte[16] { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 }, // 57 + new byte[16] { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, // 58 + new byte[16] { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 }, // 59 + new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // 60 + new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // 61 + new byte[16] { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 }, // 62 + new byte[16] { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 } // 63 + }, + new byte[64][] + { + new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 }, // 0 + new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 }, // 1 + new byte[16] { 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // 2 + new byte[16] { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 }, // 3 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 }, // 4 + new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 }, // 5 + new byte[16] { 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }, // 6 + new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // 7 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }, // 8 + new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 }, // 9 + new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // 10 + new byte[16] { 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 }, // 11 + new byte[16] { 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 }, // 12 + new byte[16] { 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 }, // 13 + new byte[16] { 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // 14 + new byte[16] { 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 }, // 15 + new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 }, // 16 + new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 }, // 17 + new byte[16] { 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 }, // 18 + new byte[16] { 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 }, // 19 + new byte[16] { 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 }, // 20 + new byte[16] { 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 }, // 21 + new byte[16] { 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 }, // 22 + new byte[16] { 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 }, // 23 + new byte[16] { 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 }, // 24 + new byte[16] { 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 }, // 25 + new byte[16] { 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 }, // 26 + new byte[16] { 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 }, // 27 + new byte[16] { 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 }, // 28 + new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 }, // 29 + new byte[16] { 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 }, // 30 + new byte[16] { 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 }, // 31 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // 32 + new byte[16] { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 }, // 33 + new byte[16] { 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 }, // 34 + new byte[16] { 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 }, // 35 + new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 }, // 36 + new byte[16] { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }, // 37 + new byte[16] { 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 }, // 38 + new byte[16] { 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 }, // 39 + new byte[16] { 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 }, // 40 + new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // 41 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 }, // 42 + new byte[16] { 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 }, // 43 + new byte[16] { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 }, // 44 + new byte[16] { 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 }, // 45 + new byte[16] { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 }, // 46 + new byte[16] { 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }, // 47 + new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 }, // 48 + new byte[16] { 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 }, // 49 + new byte[16] { 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 }, // 50 + new byte[16] { 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 }, // 51 + new byte[16] { 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 }, // 52 + new byte[16] { 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 }, // 53 + new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 }, // 54 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 }, // 55 + new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 }, // 56 + new byte[16] { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 }, // 57 + new byte[16] { 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 }, // 58 + new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 }, // 59 + new byte[16] { 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 }, // 60 + new byte[16] { 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 }, // 61 + new byte[16] { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, // 62 + new byte[16] { 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 } // 63 + } + }; + } +} diff --git a/Ryujinx.Graphics.Texture/Utils/BC67Utils.cs b/Ryujinx.Graphics.Texture/Utils/BC67Utils.cs new file mode 100644 index 000000000..e6c3f6e76 --- /dev/null +++ b/Ryujinx.Graphics.Texture/Utils/BC67Utils.cs @@ -0,0 +1,1327 @@ +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Graphics.Texture.Utils +{ + static class BC67Utils + { + private static byte[][] _quantizationLut; + private static byte[][] _quantizationLutNoPBit; + + static BC67Utils() + { + _quantizationLut = new byte[5][]; + _quantizationLutNoPBit = new byte[5][]; + + for (int depth = 4; depth < 9; depth++) + { + byte[] lut = new byte[512]; + byte[] lutNoPBit = new byte[256]; + + for (int i = 0; i < lut.Length; i++) + { + lut[i] = QuantizeComponentForLut((byte)i, depth, i >> 8); + + if (i < lutNoPBit.Length) + { + lutNoPBit[i] = QuantizeComponentForLut((byte)i, depth); + } + } + + _quantizationLut[depth - 4] = lut; + _quantizationLutNoPBit[depth - 4] = lutNoPBit; + } + } + + public static (RgbaColor8, RgbaColor8) GetMinMaxColors(ReadOnlySpan tile, int w, int h) + { + if (Sse41.IsSupported && w == 4 && h == 4) + { + GetMinMaxColorsOneSubset4x4Sse41(tile, out RgbaColor8 minColor, out RgbaColor8 maxColor); + + return (minColor, maxColor); + } + else + { + RgbaColor8 minColor = new RgbaColor8(255, 255, 255, 255); + RgbaColor8 maxColor = default; + + for (int i = 0; i < tile.Length; i++) + { + RgbaColor8 color = RgbaColor8.FromUInt32(tile[i]); + + minColor.R = Math.Min(minColor.R, color.R); + minColor.G = Math.Min(minColor.G, color.G); + minColor.B = Math.Min(minColor.B, color.B); + minColor.A = Math.Min(minColor.A, color.A); + + maxColor.R = Math.Max(maxColor.R, color.R); + maxColor.G = Math.Max(maxColor.G, color.G); + maxColor.B = Math.Max(maxColor.B, color.B); + maxColor.A = Math.Max(maxColor.A, color.A); + } + + return (minColor, maxColor); + } + } + + public static void GetMinMaxColors( + ReadOnlySpan partitionTable, + ReadOnlySpan tile, + int w, + int h, + Span minColors, + Span maxColors, + int subsetCount) + { + if (Sse41.IsSupported && w == 4 && h == 4) + { + if (subsetCount == 1) + { + GetMinMaxColorsOneSubset4x4Sse41(tile, out minColors[0], out maxColors[0]); + return; + } + else if (subsetCount == 2) + { + GetMinMaxColorsTwoSubsets4x4Sse41(partitionTable, tile, minColors, maxColors); + return; + } + } + + minColors.Fill(new RgbaColor8(255, 255, 255, 255)); + + int i = 0; + for (int ty = 0; ty < h; ty++) + { + for (int tx = 0; tx < w; tx++) + { + int subset = partitionTable[ty * w + tx]; + RgbaColor8 color = RgbaColor8.FromUInt32(tile[i++]); + + minColors[subset].R = Math.Min(minColors[subset].R, color.R); + minColors[subset].G = Math.Min(minColors[subset].G, color.G); + minColors[subset].B = Math.Min(minColors[subset].B, color.B); + minColors[subset].A = Math.Min(minColors[subset].A, color.A); + + maxColors[subset].R = Math.Max(maxColors[subset].R, color.R); + maxColors[subset].G = Math.Max(maxColors[subset].G, color.G); + maxColors[subset].B = Math.Max(maxColors[subset].B, color.B); + maxColors[subset].A = Math.Max(maxColors[subset].A, color.A); + } + } + } + + private static unsafe void GetMinMaxColorsOneSubset4x4Sse41(ReadOnlySpan tile, out RgbaColor8 minColor, out RgbaColor8 maxColor) + { + Vector128 min = Vector128.AllBitsSet; + Vector128 max = Vector128.Zero; + Vector128 row0, row1, row2, row3; + + fixed (uint* pTile = tile) + { + row0 = Sse2.LoadVector128(pTile).AsByte(); + row1 = Sse2.LoadVector128(pTile + 4).AsByte(); + row2 = Sse2.LoadVector128(pTile + 8).AsByte(); + row3 = Sse2.LoadVector128(pTile + 12).AsByte(); + } + + min = Sse2.Min(min, row0); + max = Sse2.Max(max, row0); + min = Sse2.Min(min, row1); + max = Sse2.Max(max, row1); + min = Sse2.Min(min, row2); + max = Sse2.Max(max, row2); + min = Sse2.Min(min, row3); + max = Sse2.Max(max, row3); + + minColor = HorizontalMin(min); + maxColor = HorizontalMax(max); + } + + private static unsafe void GetMinMaxColorsTwoSubsets4x4Sse41( + ReadOnlySpan partitionTable, + ReadOnlySpan tile, + Span minColors, + Span maxColors) + { + Vector128 partitionMask; + + fixed (byte* pPartitionTable = partitionTable) + { + partitionMask = Sse2.LoadVector128(pPartitionTable); + } + + Vector128 subset0Mask = Sse2.CompareEqual(partitionMask, Vector128.Zero); + + Vector128 subset0MaskRep16Low = Sse2.UnpackLow(subset0Mask, subset0Mask); + Vector128 subset0MaskRep16High = Sse2.UnpackHigh(subset0Mask, subset0Mask); + + Vector128 subset0Mask0 = Sse2.UnpackLow(subset0MaskRep16Low.AsInt16(), subset0MaskRep16Low.AsInt16()).AsByte(); + Vector128 subset0Mask1 = Sse2.UnpackHigh(subset0MaskRep16Low.AsInt16(), subset0MaskRep16Low.AsInt16()).AsByte(); + Vector128 subset0Mask2 = Sse2.UnpackLow(subset0MaskRep16High.AsInt16(), subset0MaskRep16High.AsInt16()).AsByte(); + Vector128 subset0Mask3 = Sse2.UnpackHigh(subset0MaskRep16High.AsInt16(), subset0MaskRep16High.AsInt16()).AsByte(); + + Vector128 min0 = Vector128.AllBitsSet; + Vector128 min1 = Vector128.AllBitsSet; + Vector128 max0 = Vector128.Zero; + Vector128 max1 = Vector128.Zero; + + Vector128 row0, row1, row2, row3; + + fixed (uint* pTile = tile) + { + row0 = Sse2.LoadVector128(pTile).AsByte(); + row1 = Sse2.LoadVector128(pTile + 4).AsByte(); + row2 = Sse2.LoadVector128(pTile + 8).AsByte(); + row3 = Sse2.LoadVector128(pTile + 12).AsByte(); + } + + min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row0, subset0Mask0)); + min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row1, subset0Mask1)); + min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row2, subset0Mask2)); + min0 = Sse2.Min(min0, Sse41.BlendVariable(min0, row3, subset0Mask3)); + + min1 = Sse2.Min(min1, Sse2.Or(row0, subset0Mask0)); + min1 = Sse2.Min(min1, Sse2.Or(row1, subset0Mask1)); + min1 = Sse2.Min(min1, Sse2.Or(row2, subset0Mask2)); + min1 = Sse2.Min(min1, Sse2.Or(row3, subset0Mask3)); + + max0 = Sse2.Max(max0, Sse2.And(row0, subset0Mask0)); + max0 = Sse2.Max(max0, Sse2.And(row1, subset0Mask1)); + max0 = Sse2.Max(max0, Sse2.And(row2, subset0Mask2)); + max0 = Sse2.Max(max0, Sse2.And(row3, subset0Mask3)); + + max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask0, row0)); + max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask1, row1)); + max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask2, row2)); + max1 = Sse2.Max(max1, Sse2.AndNot(subset0Mask3, row3)); + + minColors[0] = HorizontalMin(min0); + minColors[1] = HorizontalMin(min1); + maxColors[0] = HorizontalMax(max0); + maxColors[1] = HorizontalMax(max1); + } + + private static RgbaColor8 HorizontalMin(Vector128 x) + { + x = Sse2.Min(x, Sse2.Shuffle(x.AsInt32(), 0x31).AsByte()); + x = Sse2.Min(x, Sse2.Shuffle(x.AsInt32(), 2).AsByte()); + return RgbaColor8.FromUInt32(x.AsUInt32().GetElement(0)); + } + + private static RgbaColor8 HorizontalMax(Vector128 x) + { + x = Sse2.Max(x, Sse2.Shuffle(x.AsInt32(), 0x31).AsByte()); + x = Sse2.Max(x, Sse2.Shuffle(x.AsInt32(), 2).AsByte()); + return RgbaColor8.FromUInt32(x.AsUInt32().GetElement(0)); + } + + public static int SelectIndices( + ReadOnlySpan values, + uint endPoint0, + uint endPoint1, + int pBit0, + int pBit1, + int indexBitCount, + int indexCount, + int colorDepth, + int alphaDepth, + uint alphaMask) + { + if (Sse41.IsSupported) + { + if (indexBitCount == 2) + { + return Select2BitIndicesSse41( + values, + endPoint0, + endPoint1, + pBit0, + pBit1, + indexBitCount, + indexCount, + colorDepth, + alphaDepth, + alphaMask); + } + else if (indexBitCount == 3) + { + return Select3BitIndicesSse41( + values, + endPoint0, + endPoint1, + pBit0, + pBit1, + indexBitCount, + indexCount, + colorDepth, + alphaDepth, + alphaMask); + } + else if (indexBitCount == 4) + { + return Select4BitIndicesOneSubsetSse41( + values, + endPoint0, + endPoint1, + pBit0, + pBit1, + indexBitCount, + indexCount, + colorDepth, + alphaDepth, + alphaMask); + } + } + + return SelectIndicesFallback( + values, + endPoint0, + endPoint1, + pBit0, + pBit1, + indexBitCount, + indexCount, + colorDepth, + alphaDepth, + alphaMask); + } + + private static unsafe int Select2BitIndicesSse41( + ReadOnlySpan values, + uint endPoint0, + uint endPoint1, + int pBit0, + int pBit1, + int indexBitCount, + int indexCount, + int colorDepth, + int alphaDepth, + uint alphaMask) + { + uint alphaMaskForPalette = alphaMask; + + if (alphaDepth == 0) + { + alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); + } + + int errorSum = 0; + + RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); + RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); + + Vector128 c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); + Vector128 c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); + + Vector128 c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); + + Vector128 rWeights; + Vector128 lWeights; + + fixed (byte* pWeights = BC67Tables.Weights[0], pInvWeights = BC67Tables.InverseWeights[0]) + { + rWeights = Sse2.LoadScalarVector128((uint*)pWeights).AsByte(); + lWeights = Sse2.LoadScalarVector128((uint*)pInvWeights).AsByte(); + } + + Vector128 iWeights = Sse2.UnpackLow(lWeights, rWeights); + Vector128 iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); + Vector128 iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + + Vector128 pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); + Vector128 pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); + + for (int i = 0; i < values.Length; i++) + { + uint c = values[i] | alphaMask; + + Vector128 color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); + + Vector128 delta0 = Sse2.Subtract(color, pal0); + Vector128 delta1 = Sse2.Subtract(color, pal1); + + Vector128 deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); + Vector128 deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); + + Vector128 deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); + + Vector128 delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum01); + + Vector128 min = Sse41.MinHorizontal(delta); + + ushort error = min.GetElement(0); + + errorSum += error; + } + + return errorSum; + } + + private static unsafe int Select3BitIndicesSse41( + ReadOnlySpan values, + uint endPoint0, + uint endPoint1, + int pBit0, + int pBit1, + int indexBitCount, + int indexCount, + int colorDepth, + int alphaDepth, + uint alphaMask) + { + uint alphaMaskForPalette = alphaMask; + + if (alphaDepth == 0) + { + alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); + } + + int errorSum = 0; + + RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); + RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); + + Vector128 c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); + Vector128 c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); + + Vector128 c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); + + Vector128 rWeights; + Vector128 lWeights; + + fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) + { + rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); + lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); + } + + Vector128 iWeights = Sse2.UnpackLow(lWeights, rWeights); + Vector128 iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); + Vector128 iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); + Vector128 iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + Vector128 iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + + Vector128 pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); + Vector128 pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); + Vector128 pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); + Vector128 pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); + + for (int i = 0; i < values.Length; i++) + { + uint c = values[i] | alphaMask; + + Vector128 color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); + + Vector128 delta0 = Sse2.Subtract(color, pal0); + Vector128 delta1 = Sse2.Subtract(color, pal1); + Vector128 delta2 = Sse2.Subtract(color, pal2); + Vector128 delta3 = Sse2.Subtract(color, pal3); + + Vector128 deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); + Vector128 deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); + Vector128 deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); + Vector128 deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); + + Vector128 deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); + Vector128 deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); + + Vector128 delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); + + Vector128 min = Sse41.MinHorizontal(delta); + + ushort error = min.GetElement(0); + + errorSum += error; + } + + return errorSum; + } + + private static unsafe int Select4BitIndicesOneSubsetSse41( + ReadOnlySpan values, + uint endPoint0, + uint endPoint1, + int pBit0, + int pBit1, + int indexBitCount, + int indexCount, + int colorDepth, + int alphaDepth, + uint alphaMask) + { + uint alphaMaskForPalette = alphaMask; + + if (alphaDepth == 0) + { + alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); + } + + int errorSum = 0; + + RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); + RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); + + Vector128 c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); + Vector128 c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); + + Vector128 c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); + + Vector128 rWeights; + Vector128 lWeights; + + fixed (byte* pWeights = BC67Tables.Weights[2], pInvWeights = BC67Tables.InverseWeights[2]) + { + rWeights = Sse2.LoadVector128(pWeights); + lWeights = Sse2.LoadVector128(pInvWeights); + } + + Vector128 iWeightsLow = Sse2.UnpackLow(lWeights, rWeights); + Vector128 iWeightsHigh = Sse2.UnpackHigh(lWeights, rWeights); + Vector128 iWeights01 = Sse2.UnpackLow(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte(); + Vector128 iWeights23 = Sse2.UnpackHigh(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte(); + Vector128 iWeights45 = Sse2.UnpackLow(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte(); + Vector128 iWeights67 = Sse2.UnpackHigh(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte(); + Vector128 iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + Vector128 iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + Vector128 iWeights4 = Sse2.UnpackLow(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte(); + Vector128 iWeights5 = Sse2.UnpackHigh(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte(); + Vector128 iWeights6 = Sse2.UnpackLow(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte(); + Vector128 iWeights7 = Sse2.UnpackHigh(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte(); + + Vector128 pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); + Vector128 pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); + Vector128 pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); + Vector128 pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); + Vector128 pal4 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights4.AsSByte())); + Vector128 pal5 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights5.AsSByte())); + Vector128 pal6 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights6.AsSByte())); + Vector128 pal7 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights7.AsSByte())); + + for (int i = 0; i < values.Length; i++) + { + uint c = values[i] | alphaMask; + + Vector128 color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); + + Vector128 delta0 = Sse2.Subtract(color, pal0); + Vector128 delta1 = Sse2.Subtract(color, pal1); + Vector128 delta2 = Sse2.Subtract(color, pal2); + Vector128 delta3 = Sse2.Subtract(color, pal3); + Vector128 delta4 = Sse2.Subtract(color, pal4); + Vector128 delta5 = Sse2.Subtract(color, pal5); + Vector128 delta6 = Sse2.Subtract(color, pal6); + Vector128 delta7 = Sse2.Subtract(color, pal7); + + Vector128 deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); + Vector128 deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); + Vector128 deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); + Vector128 deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); + Vector128 deltaSum4 = Sse2.MultiplyAddAdjacent(delta4, delta4); + Vector128 deltaSum5 = Sse2.MultiplyAddAdjacent(delta5, delta5); + Vector128 deltaSum6 = Sse2.MultiplyAddAdjacent(delta6, delta6); + Vector128 deltaSum7 = Sse2.MultiplyAddAdjacent(delta7, delta7); + + Vector128 deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); + Vector128 deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); + Vector128 deltaSum45 = Ssse3.HorizontalAdd(deltaSum4, deltaSum5); + Vector128 deltaSum67 = Ssse3.HorizontalAdd(deltaSum6, deltaSum7); + + Vector128 delta0123 = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); + Vector128 delta4567 = Sse41.PackUnsignedSaturate(deltaSum45, deltaSum67); + + Vector128 min0123 = Sse41.MinHorizontal(delta0123); + Vector128 min4567 = Sse41.MinHorizontal(delta4567); + + ushort minPos0123 = min0123.GetElement(0); + ushort minPos4567 = min4567.GetElement(0); + + if (minPos4567 < minPos0123) + { + errorSum += minPos4567; + } + else + { + errorSum += minPos0123; + } + } + + return errorSum; + } + + private static int SelectIndicesFallback( + ReadOnlySpan values, + uint endPoint0, + uint endPoint1, + int pBit0, + int pBit1, + int indexBitCount, + int indexCount, + int colorDepth, + int alphaDepth, + uint alphaMask) + { + int errorSum = 0; + + uint alphaMaskForPalette = alphaMask; + + if (alphaDepth == 0) + { + alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); + } + + Span palette = stackalloc uint[indexCount]; + + RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); + RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); + + Unsafe.As(ref c0) |= alphaMaskForPalette; + Unsafe.As(ref c1) |= alphaMaskForPalette; + + palette[0] = c0.ToUInt32(); + palette[indexCount - 1] = c1.ToUInt32(); + + for (int j = 1; j < indexCount - 1; j++) + { + palette[j] = Interpolate(c0, c1, j, indexBitCount).ToUInt32(); + } + + for (int i = 0; i < values.Length; i++) + { + uint color = values[i] | alphaMask; + + int bestMatchScore = int.MaxValue; + int bestMatchIndex = 0; + + for (int j = 0; j < indexCount; j++) + { + int score = SquaredDifference( + RgbaColor8.FromUInt32(color).GetColor32(), + RgbaColor8.FromUInt32(palette[j]).GetColor32()); + + if (score < bestMatchScore) + { + bestMatchScore = score; + bestMatchIndex = j; + } + } + + errorSum += bestMatchScore; + } + + return errorSum; + } + + public static int SelectIndices( + ReadOnlySpan tile, + int w, + int h, + ReadOnlySpan endPoints0, + ReadOnlySpan endPoints1, + ReadOnlySpan pBitValues, + Span indices, + int subsetCount, + int partition, + int indexBitCount, + int indexCount, + int colorDepth, + int alphaDepth, + int pBits, + uint alphaMask) + { + if (Sse41.IsSupported) + { + if (indexBitCount == 2) + { + return Select2BitIndicesSse41( + tile, + w, + h, + endPoints0, + endPoints1, + pBitValues, + indices, + subsetCount, + partition, + colorDepth, + alphaDepth, + pBits, + alphaMask); + } + else if (indexBitCount == 3) + { + return Select3BitIndicesSse41( + tile, + w, + h, + endPoints0, + endPoints1, + pBitValues, + indices, + subsetCount, + partition, + colorDepth, + alphaDepth, + pBits, + alphaMask); + } + else if (indexBitCount == 4) + { + Debug.Assert(subsetCount == 1); + + return Select4BitIndicesOneSubsetSse41( + tile, + w, + h, + endPoints0[0], + endPoints1[0], + pBitValues, + indices, + partition, + colorDepth, + alphaDepth, + pBits, + alphaMask); + } + } + + return SelectIndicesFallback( + tile, + w, + h, + endPoints0, + endPoints1, + pBitValues, + indices, + subsetCount, + partition, + indexBitCount, + indexCount, + colorDepth, + alphaDepth, + pBits, + alphaMask); + } + + private static unsafe int Select2BitIndicesSse41( + ReadOnlySpan tile, + int w, + int h, + ReadOnlySpan endPoints0, + ReadOnlySpan endPoints1, + ReadOnlySpan pBitValues, + Span indices, + int subsetCount, + int partition, + int colorDepth, + int alphaDepth, + int pBits, + uint alphaMask) + { + byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; + + uint alphaMaskForPalette = alphaMask; + + if (alphaDepth == 0) + { + alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); + } + + int errorSum = 0; + + for (int subset = 0; subset < subsetCount; subset++) + { + int pBit0 = -1, pBit1 = -1; + + if (pBits == subsetCount) + { + pBit0 = pBit1 = pBitValues[subset]; + } + else if (pBits != 0) + { + pBit0 = pBitValues[subset * 2]; + pBit1 = pBitValues[subset * 2 + 1]; + } + + RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoints0[subset]), colorDepth, alphaDepth, pBit0); + RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoints1[subset]), colorDepth, alphaDepth, pBit1); + + Vector128 c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); + Vector128 c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); + + Vector128 c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); + + Vector128 rWeights; + Vector128 lWeights; + + fixed (byte* pWeights = BC67Tables.Weights[0], pInvWeights = BC67Tables.InverseWeights[0]) + { + rWeights = Sse2.LoadScalarVector128((uint*)pWeights).AsByte(); + lWeights = Sse2.LoadScalarVector128((uint*)pInvWeights).AsByte(); + } + + Vector128 iWeights = Sse2.UnpackLow(lWeights, rWeights); + Vector128 iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); + Vector128 iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + + Vector128 pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); + Vector128 pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); + + int i = 0; + for (int ty = 0; ty < h; ty++) + { + for (int tx = 0; tx < w; tx++, i++) + { + int tileOffset = ty * 4 + tx; + if (partitionTable[tileOffset] != subset) + { + continue; + } + + uint c = tile[i] | alphaMask; + + Vector128 color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); + + Vector128 delta0 = Sse2.Subtract(color, pal0); + Vector128 delta1 = Sse2.Subtract(color, pal1); + + Vector128 deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); + Vector128 deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); + + Vector128 deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); + + Vector128 delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum01); + + Vector128 min = Sse41.MinHorizontal(delta); + + uint minPos = min.AsUInt32().GetElement(0); + ushort error = (ushort)minPos; + uint index = minPos >> 16; + + indices[tileOffset] = (byte)index; + errorSum += error; + } + } + } + + return errorSum; + } + + private static unsafe int Select3BitIndicesSse41( + ReadOnlySpan tile, + int w, + int h, + ReadOnlySpan endPoints0, + ReadOnlySpan endPoints1, + ReadOnlySpan pBitValues, + Span indices, + int subsetCount, + int partition, + int colorDepth, + int alphaDepth, + int pBits, + uint alphaMask) + { + byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; + + uint alphaMaskForPalette = alphaMask; + + if (alphaDepth == 0) + { + alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); + } + + int errorSum = 0; + + for (int subset = 0; subset < subsetCount; subset++) + { + int pBit0 = -1, pBit1 = -1; + + if (pBits == subsetCount) + { + pBit0 = pBit1 = pBitValues[subset]; + } + else if (pBits != 0) + { + pBit0 = pBitValues[subset * 2]; + pBit1 = pBitValues[subset * 2 + 1]; + } + + RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoints0[subset]), colorDepth, alphaDepth, pBit0); + RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoints1[subset]), colorDepth, alphaDepth, pBit1); + + Vector128 c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); + Vector128 c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); + + Vector128 c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); + + Vector128 rWeights; + Vector128 lWeights; + + fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) + { + rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); + lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); + } + + Vector128 iWeights = Sse2.UnpackLow(lWeights, rWeights); + Vector128 iWeights01 = Sse2.UnpackLow(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); + Vector128 iWeights23 = Sse2.UnpackHigh(iWeights.AsInt16(), iWeights.AsInt16()).AsByte(); + Vector128 iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + Vector128 iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + + Vector128 pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); + Vector128 pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); + Vector128 pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); + Vector128 pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); + + int i = 0; + for (int ty = 0; ty < h; ty++) + { + for (int tx = 0; tx < w; tx++, i++) + { + int tileOffset = ty * 4 + tx; + if (partitionTable[tileOffset] != subset) + { + continue; + } + + uint c = tile[i] | alphaMask; + + Vector128 color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); + + Vector128 delta0 = Sse2.Subtract(color, pal0); + Vector128 delta1 = Sse2.Subtract(color, pal1); + Vector128 delta2 = Sse2.Subtract(color, pal2); + Vector128 delta3 = Sse2.Subtract(color, pal3); + + Vector128 deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); + Vector128 deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); + Vector128 deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); + Vector128 deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); + + Vector128 deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); + Vector128 deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); + + Vector128 delta = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); + + Vector128 min = Sse41.MinHorizontal(delta); + + uint minPos = min.AsUInt32().GetElement(0); + ushort error = (ushort)minPos; + uint index = minPos >> 16; + + indices[tileOffset] = (byte)index; + errorSum += error; + } + } + } + + return errorSum; + } + + private static unsafe int Select4BitIndicesOneSubsetSse41( + ReadOnlySpan tile, + int w, + int h, + uint endPoint0, + uint endPoint1, + ReadOnlySpan pBitValues, + Span indices, + int partition, + int colorDepth, + int alphaDepth, + int pBits, + uint alphaMask) + { + uint alphaMaskForPalette = alphaMask; + + if (alphaDepth == 0) + { + alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); + } + + int errorSum = 0; + + int pBit0 = -1, pBit1 = -1; + + if (pBits != 0) + { + pBit0 = pBitValues[0]; + pBit1 = pBitValues[1]; + } + + RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoint0), colorDepth, alphaDepth, pBit0); + RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoint1), colorDepth, alphaDepth, pBit1); + + Vector128 c0Rep = Vector128.Create(c0.ToUInt32() | alphaMaskForPalette).AsByte(); + Vector128 c1Rep = Vector128.Create(c1.ToUInt32() | alphaMaskForPalette).AsByte(); + + Vector128 c0c1 = Sse2.UnpackLow(c0Rep, c1Rep); + + Vector128 rWeights; + Vector128 lWeights; + + fixed (byte* pWeights = BC67Tables.Weights[2], pInvWeights = BC67Tables.InverseWeights[2]) + { + rWeights = Sse2.LoadVector128(pWeights); + lWeights = Sse2.LoadVector128(pInvWeights); + } + + Vector128 iWeightsLow = Sse2.UnpackLow(lWeights, rWeights); + Vector128 iWeightsHigh = Sse2.UnpackHigh(lWeights, rWeights); + Vector128 iWeights01 = Sse2.UnpackLow(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte(); + Vector128 iWeights23 = Sse2.UnpackHigh(iWeightsLow.AsInt16(), iWeightsLow.AsInt16()).AsByte(); + Vector128 iWeights45 = Sse2.UnpackLow(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte(); + Vector128 iWeights67 = Sse2.UnpackHigh(iWeightsHigh.AsInt16(), iWeightsHigh.AsInt16()).AsByte(); + Vector128 iWeights0 = Sse2.UnpackLow(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights1 = Sse2.UnpackHigh(iWeights01.AsInt16(), iWeights01.AsInt16()).AsByte(); + Vector128 iWeights2 = Sse2.UnpackLow(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + Vector128 iWeights3 = Sse2.UnpackHigh(iWeights23.AsInt16(), iWeights23.AsInt16()).AsByte(); + Vector128 iWeights4 = Sse2.UnpackLow(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte(); + Vector128 iWeights5 = Sse2.UnpackHigh(iWeights45.AsInt16(), iWeights45.AsInt16()).AsByte(); + Vector128 iWeights6 = Sse2.UnpackLow(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte(); + Vector128 iWeights7 = Sse2.UnpackHigh(iWeights67.AsInt16(), iWeights67.AsInt16()).AsByte(); + + Vector128 pal0 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights0.AsSByte())); + Vector128 pal1 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights1.AsSByte())); + Vector128 pal2 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights2.AsSByte())); + Vector128 pal3 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights3.AsSByte())); + Vector128 pal4 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights4.AsSByte())); + Vector128 pal5 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights5.AsSByte())); + Vector128 pal6 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights6.AsSByte())); + Vector128 pal7 = ShiftRoundToNearest(Ssse3.MultiplyAddAdjacent(c0c1, iWeights7.AsSByte())); + + int i = 0; + for (int ty = 0; ty < h; ty++) + { + for (int tx = 0; tx < w; tx++, i++) + { + uint c = tile[i] | alphaMask; + + Vector128 color = Sse41.ConvertToVector128Int16(Vector128.Create(c).AsByte()); + + Vector128 delta0 = Sse2.Subtract(color, pal0); + Vector128 delta1 = Sse2.Subtract(color, pal1); + Vector128 delta2 = Sse2.Subtract(color, pal2); + Vector128 delta3 = Sse2.Subtract(color, pal3); + Vector128 delta4 = Sse2.Subtract(color, pal4); + Vector128 delta5 = Sse2.Subtract(color, pal5); + Vector128 delta6 = Sse2.Subtract(color, pal6); + Vector128 delta7 = Sse2.Subtract(color, pal7); + + Vector128 deltaSum0 = Sse2.MultiplyAddAdjacent(delta0, delta0); + Vector128 deltaSum1 = Sse2.MultiplyAddAdjacent(delta1, delta1); + Vector128 deltaSum2 = Sse2.MultiplyAddAdjacent(delta2, delta2); + Vector128 deltaSum3 = Sse2.MultiplyAddAdjacent(delta3, delta3); + Vector128 deltaSum4 = Sse2.MultiplyAddAdjacent(delta4, delta4); + Vector128 deltaSum5 = Sse2.MultiplyAddAdjacent(delta5, delta5); + Vector128 deltaSum6 = Sse2.MultiplyAddAdjacent(delta6, delta6); + Vector128 deltaSum7 = Sse2.MultiplyAddAdjacent(delta7, delta7); + + Vector128 deltaSum01 = Ssse3.HorizontalAdd(deltaSum0, deltaSum1); + Vector128 deltaSum23 = Ssse3.HorizontalAdd(deltaSum2, deltaSum3); + Vector128 deltaSum45 = Ssse3.HorizontalAdd(deltaSum4, deltaSum5); + Vector128 deltaSum67 = Ssse3.HorizontalAdd(deltaSum6, deltaSum7); + + Vector128 delta0123 = Sse41.PackUnsignedSaturate(deltaSum01, deltaSum23); + Vector128 delta4567 = Sse41.PackUnsignedSaturate(deltaSum45, deltaSum67); + + Vector128 min0123 = Sse41.MinHorizontal(delta0123); + Vector128 min4567 = Sse41.MinHorizontal(delta4567); + + uint minPos0123 = min0123.AsUInt32().GetElement(0); + uint minPos4567 = min4567.AsUInt32().GetElement(0); + + if ((ushort)minPos4567 < (ushort)minPos0123) + { + errorSum += (ushort)minPos4567; + indices[ty * 4 + tx] = (byte)(8 + (minPos4567 >> 16)); + } + else + { + errorSum += (ushort)minPos0123; + indices[ty * 4 + tx] = (byte)(minPos0123 >> 16); + } + } + } + + return errorSum; + } + + private static Vector128 ShiftRoundToNearest(Vector128 x) + { + return Sse2.ShiftRightLogical(Sse2.Add(x, Vector128.Create((short)32)), 6); + } + + private static int SelectIndicesFallback( + ReadOnlySpan tile, + int w, + int h, + ReadOnlySpan endPoints0, + ReadOnlySpan endPoints1, + ReadOnlySpan pBitValues, + Span indices, + int subsetCount, + int partition, + int indexBitCount, + int indexCount, + int colorDepth, + int alphaDepth, + int pBits, + uint alphaMask) + { + int errorSum = 0; + + uint alphaMaskForPalette = alphaMask; + + if (alphaDepth == 0) + { + alphaMaskForPalette |= new RgbaColor8(0, 0, 0, 255).ToUInt32(); + } + + Span palette = stackalloc uint[subsetCount * indexCount]; + + for (int subset = 0; subset < subsetCount; subset++) + { + int palBase = subset * indexCount; + + int pBit0 = -1, pBit1 = -1; + + if (pBits == subsetCount) + { + pBit0 = pBit1 = pBitValues[subset]; + } + else if (pBits != 0) + { + pBit0 = pBitValues[subset * 2]; + pBit1 = pBitValues[subset * 2 + 1]; + } + + RgbaColor8 c0 = Quantize(RgbaColor8.FromUInt32(endPoints0[subset]), colorDepth, alphaDepth, pBit0); + RgbaColor8 c1 = Quantize(RgbaColor8.FromUInt32(endPoints1[subset]), colorDepth, alphaDepth, pBit1); + + Unsafe.As(ref c0) |= alphaMaskForPalette; + Unsafe.As(ref c1) |= alphaMaskForPalette; + + palette[palBase + 0] = c0.ToUInt32(); + palette[palBase + indexCount - 1] = c1.ToUInt32(); + + for (int j = 1; j < indexCount - 1; j++) + { + palette[palBase + j] = Interpolate(c0, c1, j, indexBitCount).ToUInt32(); + } + } + + int i = 0; + for (int ty = 0; ty < h; ty++) + { + for (int tx = 0; tx < w; tx++) + { + int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][ty * 4 + tx]; + uint color = tile[i++] | alphaMask; + + int bestMatchScore = int.MaxValue; + int bestMatchIndex = 0; + + for (int j = 0; j < indexCount; j++) + { + int score = SquaredDifference( + RgbaColor8.FromUInt32(color).GetColor32(), + RgbaColor8.FromUInt32(palette[subset * indexCount + j]).GetColor32()); + + if (score < bestMatchScore) + { + bestMatchScore = score; + bestMatchIndex = j; + } + } + + indices[ty * 4 + tx] = (byte)bestMatchIndex; + errorSum += bestMatchScore; + } + } + + return errorSum; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int SquaredDifference(RgbaColor32 color1, RgbaColor32 color2) + { + RgbaColor32 delta = color1 - color2; + return RgbaColor32.Dot(delta, delta); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor8 Interpolate(RgbaColor8 color1, RgbaColor8 color2, int weightIndex, int indexBitCount) + { + return Interpolate(color1.GetColor32(), color2.GetColor32(), weightIndex, indexBitCount).GetColor8(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 Interpolate(RgbaColor32 color1, RgbaColor32 color2, int weightIndex, int indexBitCount) + { + Debug.Assert(indexBitCount >= 2 && indexBitCount <= 4); + + int weight = (((weightIndex << 7) / ((1 << indexBitCount) - 1)) + 1) >> 1; + + RgbaColor32 weightV = new RgbaColor32(weight); + RgbaColor32 invWeightV = new RgbaColor32(64 - weight); + + return (color1 * invWeightV + color2 * weightV + new RgbaColor32(32)) >> 6; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 Interpolate( + RgbaColor32 color1, + RgbaColor32 color2, + int colorWeightIndex, + int alphaWeightIndex, + int colorIndexBitCount, + int alphaIndexBitCount) + { + Debug.Assert(colorIndexBitCount >= 2 && colorIndexBitCount <= 4); + Debug.Assert(alphaIndexBitCount >= 2 && alphaIndexBitCount <= 4); + + int colorWeight = BC67Tables.Weights[colorIndexBitCount - 2][colorWeightIndex]; + int alphaWeight = BC67Tables.Weights[alphaIndexBitCount - 2][alphaWeightIndex]; + + RgbaColor32 weightV = new RgbaColor32(colorWeight); + weightV.A = alphaWeight; + RgbaColor32 invWeightV = new RgbaColor32(64) - weightV; + + return (color1 * invWeightV + color2 * weightV + new RgbaColor32(32)) >> 6; + } + + public static RgbaColor8 Quantize(RgbaColor8 color, int colorBits, int alphaBits, int pBit = -1) + { + if (alphaBits == 0) + { + int colorShift = 8 - colorBits; + + uint c; + + if (pBit >= 0) + { + byte[] lutColor = _quantizationLut[colorBits - 4]; + + Debug.Assert(pBit <= 1); + int high = pBit << 8; + uint mask = (0xffu >> (colorBits + 1)) * 0x10101; + + c = lutColor[color.R | high]; + c |= (uint)lutColor[color.G | high] << 8; + c |= (uint)lutColor[color.B | high] << 16; + + c <<= colorShift; + c |= (c >> (colorBits + 1)) & mask; + c |= ((uint)pBit * 0x10101) << (colorShift - 1); + } + else + { + byte[] lutColor = _quantizationLutNoPBit[colorBits - 4]; + + uint mask = (0xffu >> colorBits) * 0x10101; + + c = lutColor[color.R]; + c |= (uint)lutColor[color.G] << 8; + c |= (uint)lutColor[color.B] << 16; + + c <<= colorShift; + c |= (c >> colorBits) & mask; + } + + c |= (uint)color.A << 24; + + return RgbaColor8.FromUInt32(c); + } + + return QuantizeFallback(color, colorBits, alphaBits, pBit); + } + + private static RgbaColor8 QuantizeFallback(RgbaColor8 color, int colorBits, int alphaBits, int pBit = -1) + { + byte r = UnquantizeComponent(QuantizeComponent(color.R, colorBits, pBit), colorBits, pBit); + byte g = UnquantizeComponent(QuantizeComponent(color.G, colorBits, pBit), colorBits, pBit); + byte b = UnquantizeComponent(QuantizeComponent(color.B, colorBits, pBit), colorBits, pBit); + byte a = alphaBits == 0 ? color.A : UnquantizeComponent(QuantizeComponent(color.A, alphaBits, pBit), alphaBits, pBit); + return new RgbaColor8(r, g, b, a); + } + + public static byte QuantizeComponent(byte component, int bits, int pBit = -1) + { + return pBit >= 0 ? _quantizationLut[bits - 4][component | (pBit << 8)] : _quantizationLutNoPBit[bits - 4][component]; + } + + private static byte QuantizeComponentForLut(byte component, int bits, int pBit = -1) + { + int shift = 8 - bits; + int fill = component >> bits; + + if (pBit >= 0) + { + Debug.Assert(pBit <= 1); + fill >>= 1; + fill |= pBit << (shift - 1); + } + + int q1 = component >> shift; + int q2 = Math.Max(q1 - 1, 0); + int q3 = Math.Min(q1 + 1, (1 << bits) - 1); + + int delta1 = FastAbs(((q1 << shift) | fill) - component); + int delta2 = component - ((q2 << shift) | fill); + int delta3 = ((q3 << shift) | fill) - component; + + if (delta1 < delta2 && delta1 < delta3) + { + return (byte)q1; + } + else if (delta2 < delta3) + { + return (byte)q2; + } + else + { + return (byte)q3; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int FastAbs(int x) + { + int sign = x >> 31; + return (x + sign) ^ sign; + } + + private static byte UnquantizeComponent(byte component, int bits, int pBit) + { + int shift = 8 - bits; + int value = component << shift; + + if (pBit >= 0) + { + Debug.Assert(pBit <= 1); + value |= value >> (bits + 1); + value |= pBit << (shift - 1); + } + else + { + value |= value >> bits; + } + + return (byte)value; + } + } +} diff --git a/Ryujinx.Graphics.Texture/Utils/BC7ModeInfo.cs b/Ryujinx.Graphics.Texture/Utils/BC7ModeInfo.cs new file mode 100644 index 000000000..749324bf0 --- /dev/null +++ b/Ryujinx.Graphics.Texture/Utils/BC7ModeInfo.cs @@ -0,0 +1,37 @@ +namespace Ryujinx.Graphics.Texture.Utils +{ + struct BC7ModeInfo + { + public readonly int SubsetCount; + public readonly int PartitionBitCount; + public readonly int PBits; + public readonly int RotationBitCount; + public readonly int IndexModeBitCount; + public readonly int ColorIndexBitCount; + public readonly int AlphaIndexBitCount; + public readonly int ColorDepth; + public readonly int AlphaDepth; + + public BC7ModeInfo( + int subsetCount, + int partitionBitsCount, + int pBits, + int rotationBitCount, + int indexModeBitCount, + int colorIndexBitCount, + int alphaIndexBitCount, + int colorDepth, + int alphaDepth) + { + SubsetCount = subsetCount; + PartitionBitCount = partitionBitsCount; + PBits = pBits; + RotationBitCount = rotationBitCount; + IndexModeBitCount = indexModeBitCount; + ColorIndexBitCount = colorIndexBitCount; + AlphaIndexBitCount = alphaIndexBitCount; + ColorDepth = colorDepth; + AlphaDepth = alphaDepth; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Utils/Block.cs b/Ryujinx.Graphics.Texture/Utils/Block.cs new file mode 100644 index 000000000..a8bae077d --- /dev/null +++ b/Ryujinx.Graphics.Texture/Utils/Block.cs @@ -0,0 +1,55 @@ +namespace Ryujinx.Graphics.Texture.Utils +{ + struct Block + { + public ulong Low; + public ulong High; + + public void Encode(ulong value, ref int offset, int bits) + { + if (offset >= 64) + { + High |= value << (offset - 64); + } + else + { + Low |= value << offset; + + if (offset + bits > 64) + { + int remainder = 64 - offset; + High |= value >> remainder; + } + } + + offset += bits; + } + + public ulong Decode(ref int offset, int bits) + { + ulong value; + ulong mask = bits == 64 ? ulong.MaxValue : (1UL << bits) - 1; + + if (offset >= 64) + { + value = (High >> (offset - 64)) & mask; + } + else + { + value = Low >> offset; + + if (offset + bits > 64) + { + int remainder = 64 - offset; + value |= High << remainder; + } + + value &= mask; + } + + offset += bits; + + return value; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Utils/RgbaColor32.cs b/Ryujinx.Graphics.Texture/Utils/RgbaColor32.cs new file mode 100644 index 000000000..412493274 --- /dev/null +++ b/Ryujinx.Graphics.Texture/Utils/RgbaColor32.cs @@ -0,0 +1,225 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Graphics.Texture.Utils +{ + struct RgbaColor32 : IEquatable + { + private Vector128 _color; + + public int R + { + get => _color.GetElement(0); + set => _color = _color.WithElement(0, value); + } + + public int G + { + get => _color.GetElement(1); + set => _color = _color.WithElement(1, value); + } + + public int B + { + get => _color.GetElement(2); + set => _color = _color.WithElement(2, value); + } + + public int A + { + get => _color.GetElement(3); + set => _color = _color.WithElement(3, value); + } + + public RgbaColor32(Vector128 color) + { + _color = color; + } + + public RgbaColor32(int r, int g, int b, int a) + { + _color = Vector128.Create(r, g, b, a); + } + + public RgbaColor32(int scalar) + { + _color = Vector128.Create(scalar); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 operator +(RgbaColor32 x, RgbaColor32 y) + { + if (Sse2.IsSupported) + { + return new RgbaColor32(Sse2.Add(x._color, y._color)); + } + else + { + return new RgbaColor32(x.R + y.R, x.G + y.G, x.B + y.B, x.A + y.A); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 operator -(RgbaColor32 x, RgbaColor32 y) + { + if (Sse2.IsSupported) + { + return new RgbaColor32(Sse2.Subtract(x._color, y._color)); + } + else + { + return new RgbaColor32(x.R - y.R, x.G - y.G, x.B - y.B, x.A - y.A); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 operator *(RgbaColor32 x, RgbaColor32 y) + { + if (Sse41.IsSupported) + { + return new RgbaColor32(Sse41.MultiplyLow(x._color, y._color)); + } + else + { + return new RgbaColor32(x.R * y.R, x.G * y.G, x.B * y.B, x.A * y.A); + } + } + + public static RgbaColor32 operator /(RgbaColor32 x, RgbaColor32 y) + { + return new RgbaColor32(x.R / y.R, x.G / y.G, x.B / y.B, x.A / y.A); + } + + public static RgbaColor32 DivideGuarded(RgbaColor32 x, RgbaColor32 y, int resultIfZero) + { + int DivideGuarded(int dividend, int divisor) + { + if (divisor == 0) + { + return resultIfZero; + } + + return dividend / divisor; + } + + return new RgbaColor32(DivideGuarded(x.R, y.R), DivideGuarded(x.G, y.G), DivideGuarded(x.B, y.B), DivideGuarded(x.A, y.A)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 operator <<(RgbaColor32 x, int shift) + { + if (Sse2.IsSupported) + { + return new RgbaColor32(Sse2.ShiftLeftLogical(x._color, (byte)shift)); + } + else + { + return new RgbaColor32(x.R << shift, x.G << shift, x.B << shift, x.A << shift); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 operator >>(RgbaColor32 x, int shift) + { + if (Sse2.IsSupported) + { + return new RgbaColor32(Sse2.ShiftRightLogical(x._color, (byte)shift)); + } + else + { + return new RgbaColor32(x.R >> shift, x.G >> shift, x.B >> shift, x.A >> shift); + } + } + + public static bool operator ==(RgbaColor32 x, RgbaColor32 y) + { + return x.Equals(y); + } + + public static bool operator !=(RgbaColor32 x, RgbaColor32 y) + { + return !x.Equals(y); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Dot(RgbaColor32 x, RgbaColor32 y) + { + if (Sse41.IsSupported) + { + Vector128 product = Sse41.MultiplyLow(x._color, y._color); + Vector128 sum = Ssse3.HorizontalAdd(product, product); + sum = Ssse3.HorizontalAdd(sum, sum); + return sum.GetElement(0); + } + else + { + return x.R * y.R + x.G * y.G + x.B * y.B + x.A * y.A; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 Max(RgbaColor32 x, RgbaColor32 y) + { + if (Sse41.IsSupported) + { + return new RgbaColor32(Sse41.Max(x._color, y._color)); + } + else + { + return new RgbaColor32(Math.Max(x.R, y.R), Math.Max(x.G, y.G), Math.Max(x.B, y.B), Math.Max(x.A, y.A)); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 Min(RgbaColor32 x, RgbaColor32 y) + { + if (Sse41.IsSupported) + { + return new RgbaColor32(Sse41.Min(x._color, y._color)); + } + else + { + return new RgbaColor32(Math.Min(x.R, y.R), Math.Min(x.G, y.G), Math.Min(x.B, y.B), Math.Min(x.A, y.A)); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public RgbaColor8 GetColor8() + { + if (Sse41.IsSupported) + { + Vector128 temp = _color; + Vector128 color16 = Sse41.PackUnsignedSaturate(temp, temp); + Vector128 color8 = Sse2.PackUnsignedSaturate(color16.AsInt16(), color16.AsInt16()); + uint color = color8.AsUInt32().GetElement(0); + return Unsafe.As(ref color); + } + else + { + return new RgbaColor8(ClampByte(R), ClampByte(G), ClampByte(B), ClampByte(A)); + } + } + + private static byte ClampByte(int value) + { + return (byte)Math.Clamp(value, 0, 255); + } + + public override int GetHashCode() + { + return HashCode.Combine(R, G, B, A); + } + + public override bool Equals(object? obj) + { + return obj is RgbaColor32 other && Equals(other); + } + + public bool Equals(RgbaColor32 other) + { + return _color.Equals(other._color); + } + } +} diff --git a/Ryujinx.Graphics.Texture/Utils/RgbaColor8.cs b/Ryujinx.Graphics.Texture/Utils/RgbaColor8.cs new file mode 100644 index 000000000..5f7dfb4b4 --- /dev/null +++ b/Ryujinx.Graphics.Texture/Utils/RgbaColor8.cs @@ -0,0 +1,83 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Graphics.Texture.Utils +{ + struct RgbaColor8 : IEquatable + { + public byte R; + public byte G; + public byte B; + public byte A; + + public RgbaColor8(byte r, byte g, byte b, byte a) + { + R = r; + G = g; + B = b; + A = a; + } + + public static RgbaColor8 FromUInt32(uint color) + { + return Unsafe.As(ref color); + } + + public static bool operator ==(RgbaColor8 x, RgbaColor8 y) + { + return x.Equals(y); + } + + public static bool operator !=(RgbaColor8 x, RgbaColor8 y) + { + return !x.Equals(y); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public RgbaColor32 GetColor32() + { + if (Sse41.IsSupported) + { + Vector128 color = Vector128.CreateScalarUnsafe(Unsafe.As(ref this)).AsByte(); + return new RgbaColor32(Sse41.ConvertToVector128Int32(color)); + } + else + { + return new RgbaColor32(R, G, B, A); + } + } + + public uint ToUInt32() + { + return Unsafe.As(ref this); + } + + public override int GetHashCode() + { + return HashCode.Combine(R, G, B, A); + } + + public override bool Equals(object obj) + { + return obj is RgbaColor8 other && Equals(other); + } + + public bool Equals(RgbaColor8 other) + { + return R == other.R && G == other.G && B == other.B && A == other.A; + } + + public byte GetComponent(int index) + { + return index switch + { + 1 => G, + 2 => B, + 3 => A, + _ => R + }; + } + } +} diff --git a/Ryujinx.Graphics.Vic/Blender.cs b/Ryujinx.Graphics.Vic/Blender.cs index 92b641d63..b6ca35ae8 100644 --- a/Ryujinx.Graphics.Vic/Blender.cs +++ b/Ryujinx.Graphics.Vic/Blender.cs @@ -10,17 +10,22 @@ namespace Ryujinx.Graphics.Vic { static class Blender { - public static void BlendOne(Surface dst, Surface src, ref SlotStruct slot) + public static void BlendOne(Surface dst, Surface src, ref SlotStruct slot, Rectangle targetRect) { - if (Sse41.IsSupported && (dst.Width & 3) == 0) + int x1 = targetRect.X; + int y1 = targetRect.Y; + int x2 = Math.Min(src.Width, x1 + targetRect.Width); + int y2 = Math.Min(src.Height, y1 + targetRect.Height); + + if (Sse41.IsSupported && ((x1 | x2) & 3) == 0) { - BlendOneSse41(dst, src, ref slot); + BlendOneSse41(dst, src, ref slot, x1, y1, x2, y2); return; } - for (int y = 0; y < dst.Height; y++) + for (int y = y1; y < y2; y++) { - for (int x = 0; x < dst.Width; x++) + for (int x = x1; x < x2; x++) { int inR = src.GetR(x, y); int inG = src.GetG(x, y); @@ -40,9 +45,9 @@ namespace Ryujinx.Graphics.Vic } } - private unsafe static void BlendOneSse41(Surface dst, Surface src, ref SlotStruct slot) + private unsafe static void BlendOneSse41(Surface dst, Surface src, ref SlotStruct slot, int x1, int y1, int x2, int y2) { - Debug.Assert((dst.Width & 3) == 0); + Debug.Assert(((x1 | x2) & 3) == 0); ref MatrixStruct mtx = ref slot.ColorMatrixStruct; @@ -62,9 +67,9 @@ namespace Ryujinx.Graphics.Vic Pixel* ip = srcPtr; Pixel* op = dstPtr; - for (int y = 0; y < dst.Height; y++, ip += src.Width, op += dst.Width) + for (int y = y1; y < y2; y++, ip += src.Width, op += dst.Width) { - for (int x = 0; x < dst.Width; x += 4) + for (int x = x1; x < x2; x += 4) { Vector128 pixel1 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x)); Vector128 pixel2 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x + 1)); diff --git a/Ryujinx.Graphics.Vic/Rectangle.cs b/Ryujinx.Graphics.Vic/Rectangle.cs new file mode 100644 index 000000000..2a13b95c7 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Rectangle.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Vic +{ + struct Rectangle + { + public readonly int X; + public readonly int Y; + public readonly int Width; + public readonly int Height; + + public Rectangle(int x, int y, int width, int height) + { + X = x; + Y = y; + Width = width; + Height = height; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vic/VicDevice.cs b/Ryujinx.Graphics.Vic/VicDevice.cs index 537b8ba47..8b66727df 100644 --- a/Ryujinx.Graphics.Vic/VicDevice.cs +++ b/Ryujinx.Graphics.Vic/VicDevice.cs @@ -2,6 +2,7 @@ using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Vic.Image; using Ryujinx.Graphics.Vic.Types; +using System; using System.Collections.Generic; namespace Ryujinx.Graphics.Vic @@ -47,7 +48,19 @@ namespace Ryujinx.Graphics.Vic using Surface src = SurfaceReader.Read(_rm, ref slot.SlotConfig, ref slot.SlotSurfaceConfig, ref offsets); - Blender.BlendOne(output, src, ref slot); + int x1 = config.OutputConfig.TargetRectLeft; + int y1 = config.OutputConfig.TargetRectTop; + int x2 = config.OutputConfig.TargetRectRight + 1; + int y2 = config.OutputConfig.TargetRectBottom + 1; + + int targetX = Math.Min(x1, x2); + int targetY = Math.Min(y1, y2); + int targetW = Math.Min(output.Width - targetX, Math.Abs(x2 - x1)); + int targetH = Math.Min(output.Height - targetY, Math.Abs(y2 - y1)); + + Rectangle targetRect = new Rectangle(targetX, targetY, targetW, targetH); + + Blender.BlendOne(output, src, ref slot, targetRect); } SurfaceWriter.Write(_rm, output, ref config.OutputSurfaceConfig, ref _state.State.SetOutputSurface); diff --git a/Ryujinx.Graphics.Vulkan/Auto.cs b/Ryujinx.Graphics.Vulkan/Auto.cs new file mode 100644 index 000000000..e5f64df8e --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Auto.cs @@ -0,0 +1,152 @@ +using System; +using System.Diagnostics; +using System.Threading; + +namespace Ryujinx.Graphics.Vulkan +{ + interface IAuto + { + void IncrementReferenceCount(); + void DecrementReferenceCount(int cbIndex); + void DecrementReferenceCount(); + } + + interface IAutoPrivate : IAuto + { + void AddCommandBufferDependencies(CommandBufferScoped cbs); + } + + class Auto : IAutoPrivate, IDisposable where T : IDisposable + { + private int _referenceCount; + private T _value; + + private readonly BitMap _cbOwnership; + private readonly MultiFenceHolder _waitable; + private readonly IAutoPrivate[] _referencedObjs; + + private bool _disposed; + private bool _destroyed; + + public Auto(T value) + { + _referenceCount = 1; + _value = value; + _cbOwnership = new BitMap(CommandBufferPool.MaxCommandBuffers); + } + + public Auto(T value, MultiFenceHolder waitable, params IAutoPrivate[] referencedObjs) : this(value) + { + _waitable = waitable; + _referencedObjs = referencedObjs; + + for (int i = 0; i < referencedObjs.Length; i++) + { + referencedObjs[i].IncrementReferenceCount(); + } + } + + public T Get(CommandBufferScoped cbs, int offset, int size) + { + _waitable?.AddBufferUse(cbs.CommandBufferIndex, offset, size); + return Get(cbs); + } + + public T GetUnsafe() + { + return _value; + } + + public T Get(CommandBufferScoped cbs) + { + if (!_destroyed) + { + AddCommandBufferDependencies(cbs); + } + + return _value; + } + + public bool HasCommandBufferDependency(CommandBufferScoped cbs) + { + return _cbOwnership.IsSet(cbs.CommandBufferIndex); + } + + public bool HasRentedCommandBufferDependency(CommandBufferPool cbp) + { + return _cbOwnership.AnySet(); + } + + public void AddCommandBufferDependencies(CommandBufferScoped cbs) + { + // We don't want to add a reference to this object to the command buffer + // more than once, so if we detect that the command buffer already has ownership + // of this object, then we can just return without doing anything else. + if (_cbOwnership.Set(cbs.CommandBufferIndex)) + { + if (_waitable != null) + { + cbs.AddWaitable(_waitable); + } + + cbs.AddDependant(this); + + // We need to add a dependency on the command buffer to all objects this object + // references aswell. + if (_referencedObjs != null) + { + for (int i = 0; i < _referencedObjs.Length; i++) + { + _referencedObjs[i].AddCommandBufferDependencies(cbs); + } + } + } + } + + public void IncrementReferenceCount() + { + if (Interlocked.Increment(ref _referenceCount) == 1) + { + Interlocked.Decrement(ref _referenceCount); + throw new Exception("Attempted to inc ref of dead object."); + } + } + + public void DecrementReferenceCount(int cbIndex) + { + _cbOwnership.Clear(cbIndex); + DecrementReferenceCount(); + } + + public void DecrementReferenceCount() + { + if (Interlocked.Decrement(ref _referenceCount) == 0) + { + _value.Dispose(); + _value = default; + _destroyed = true; + + // Value is no longer in use by the GPU, dispose all other + // resources that it references. + if (_referencedObjs != null) + { + for (int i = 0; i < _referencedObjs.Length; i++) + { + _referencedObjs[i].DecrementReferenceCount(); + } + } + } + + Debug.Assert(_referenceCount >= 0); + } + + public void Dispose() + { + if (!_disposed) + { + DecrementReferenceCount(); + _disposed = true; + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/BackgroundResources.cs b/Ryujinx.Graphics.Vulkan/BackgroundResources.cs new file mode 100644 index 000000000..e60e48451 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/BackgroundResources.cs @@ -0,0 +1,114 @@ +using System.Threading; +using System.Collections.Generic; +using System; +using Silk.NET.Vulkan; + +namespace Ryujinx.Graphics.Vulkan +{ + class BackgroundResource : IDisposable + { + private VulkanGraphicsDevice _gd; + private Device _device; + + private CommandBufferPool _pool; + private PersistentFlushBuffer _flushBuffer; + + public BackgroundResource(VulkanGraphicsDevice gd, Device device) + { + _gd = gd; + _device = device; + } + + public CommandBufferPool GetPool() + { + if (_pool == null) + { + bool useBackground = _gd.BackgroundQueue.Handle != 0 && _gd.Vendor != Vendor.Amd; + Queue queue = useBackground ? _gd.BackgroundQueue : _gd.Queue; + object queueLock = useBackground ? _gd.BackgroundQueueLock : _gd.QueueLock; + + lock (queueLock) + { + _pool = new CommandBufferPool(_gd.Api, _device, queue, queueLock, _gd.QueueFamilyIndex, isLight: true); + } + } + + return _pool; + } + + public PersistentFlushBuffer GetFlushBuffer() + { + if (_flushBuffer == null) + { + _flushBuffer = new PersistentFlushBuffer(_gd); + } + + return _flushBuffer; + } + + public void Dispose() + { + _pool?.Dispose(); + _flushBuffer?.Dispose(); + } + } + + class BackgroundResources : IDisposable + { + private VulkanGraphicsDevice _gd; + private Device _device; + + private Dictionary _resources; + + public BackgroundResources(VulkanGraphicsDevice gd, Device device) + { + _gd = gd; + _device = device; + + _resources = new Dictionary(); + } + + private void Cleanup() + { + foreach (KeyValuePair tuple in _resources) + { + if (!tuple.Key.IsAlive) + { + tuple.Value.Dispose(); + _resources.Remove(tuple.Key); + } + } + } + + public BackgroundResource Get() + { + Thread thread = Thread.CurrentThread; + + lock (_resources) + { + BackgroundResource resource; + if (!_resources.TryGetValue(thread, out resource)) + { + Cleanup(); + + resource = new BackgroundResource(_gd, _device); + + _resources[thread] = resource; + } + + return resource; + } + } + + public void Dispose() + { + lock (_resources) + { + foreach (var resource in _resources.Values) + { + resource.Dispose(); + } + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/BitMap.cs b/Ryujinx.Graphics.Vulkan/BitMap.cs new file mode 100644 index 000000000..ee3c3c938 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/BitMap.cs @@ -0,0 +1,157 @@ +namespace Ryujinx.Graphics.Vulkan +{ + struct BitMap + { + public const int IntSize = 64; + + private const int IntShift = 6; + private const int IntMask = IntSize - 1; + + private readonly long[] _masks; + + public BitMap(int count) + { + _masks = new long[(count + IntMask) / IntSize]; + } + + public bool AnySet() + { + for (int i = 0; i < _masks.Length; i++) + { + if (_masks[i] != 0) + { + return true; + } + } + + return false; + } + + public bool IsSet(int bit) + { + int wordIndex = bit >> IntShift; + int wordBit = bit & IntMask; + + long wordMask = 1L << wordBit; + + return (_masks[wordIndex] & wordMask) != 0; + } + + public bool IsSet(int start, int end) + { + if (start == end) + { + return IsSet(start); + } + + int startIndex = start >> IntShift; + int startBit = start & IntMask; + long startMask = -1L << startBit; + + int endIndex = end >> IntShift; + int endBit = end & IntMask; + long endMask = (long)(ulong.MaxValue >> (IntMask - endBit)); + + if (startIndex == endIndex) + { + return (_masks[startIndex] & startMask & endMask) != 0; + } + + if ((_masks[startIndex] & startMask) != 0) + { + return true; + } + + for (int i = startIndex + 1; i < endIndex; i++) + { + if (_masks[i] != 0) + { + return true; + } + } + + if ((_masks[endIndex] & endMask) != 0) + { + return true; + } + + return false; + } + + public bool Set(int bit) + { + int wordIndex = bit >> IntShift; + int wordBit = bit & IntMask; + + long wordMask = 1L << wordBit; + + if ((_masks[wordIndex] & wordMask) != 0) + { + return false; + } + + _masks[wordIndex] |= wordMask; + + return true; + } + + public void SetRange(int start, int end) + { + if (start == end) + { + Set(start); + return; + } + + int startIndex = start >> IntShift; + int startBit = start & IntMask; + long startMask = -1L << startBit; + + int endIndex = end >> IntShift; + int endBit = end & IntMask; + long endMask = (long)(ulong.MaxValue >> (IntMask - endBit)); + + if (startIndex == endIndex) + { + _masks[startIndex] |= startMask & endMask; + } + else + { + _masks[startIndex] |= startMask; + + for (int i = startIndex + 1; i < endIndex; i++) + { + _masks[i] |= -1; + } + + _masks[endIndex] |= endMask; + } + } + + public void Clear(int bit) + { + int wordIndex = bit >> IntShift; + int wordBit = bit & IntMask; + + long wordMask = 1L << wordBit; + + _masks[wordIndex] &= ~wordMask; + } + + public void Clear() + { + for (int i = 0; i < _masks.Length; i++) + { + _masks[i] = 0; + } + } + + public void ClearInt(int start, int end) + { + for (int i = start; i <= end; i++) + { + _masks[i] = 0; + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vulkan/BufferHolder.cs b/Ryujinx.Graphics.Vulkan/BufferHolder.cs new file mode 100644 index 000000000..6638e4696 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/BufferHolder.cs @@ -0,0 +1,393 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Runtime.CompilerServices; +using VkBuffer = Silk.NET.Vulkan.Buffer; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class BufferHolder : IDisposable + { + private const int MaxUpdateBufferSize = 0x10000; + + public const AccessFlags DefaultAccessFlags = + AccessFlags.AccessShaderReadBit | + AccessFlags.AccessShaderWriteBit | + AccessFlags.AccessTransferReadBit | + AccessFlags.AccessTransferWriteBit | + AccessFlags.AccessUniformReadBit | + AccessFlags.AccessShaderReadBit | + AccessFlags.AccessShaderWriteBit; + + private readonly VulkanGraphicsDevice _gd; + private readonly Device _device; + private readonly MemoryAllocation _allocation; + private readonly Auto _buffer; + private readonly Auto _allocationAuto; + private readonly ulong _bufferHandle; + + private CacheByRange _cachedConvertedIndexBuffers; + + public int Size { get; } + + private IntPtr _map; + + private readonly MultiFenceHolder _waitable; + + private bool _lastAccessIsWrite; + + public BufferHolder(VulkanGraphicsDevice gd, Device device, VkBuffer buffer, MemoryAllocation allocation, int size) + { + _gd = gd; + _device = device; + _allocation = allocation; + _allocationAuto = new Auto(allocation); + _waitable = new MultiFenceHolder(size); + _buffer = new Auto(new DisposableBuffer(gd.Api, device, buffer), _waitable, _allocationAuto); + _bufferHandle = buffer.Handle; + Size = size; + _map = allocation.HostPointer; + } + + public unsafe Auto CreateView(VkFormat format, int offset, int size) + { + var bufferViewCreateInfo = new BufferViewCreateInfo() + { + SType = StructureType.BufferViewCreateInfo, + Buffer = new VkBuffer(_bufferHandle), + Format = format, + Offset = (uint)offset, + Range = (uint)size + }; + + _gd.Api.CreateBufferView(_device, bufferViewCreateInfo, null, out var bufferView).ThrowOnError(); + + return new Auto(new DisposableBufferView(_gd.Api, _device, bufferView), _waitable, _buffer); + } + + public unsafe void InsertBarrier(CommandBuffer commandBuffer, bool isWrite) + { + // If the last access is write, we always need a barrier to be sure we will read or modify + // the correct data. + // If the last access is read, and current one is a write, we need to wait until the + // read finishes to avoid overwriting data still in use. + // Otherwise, if the last access is a read and the current one too, we don't need barriers. + bool needsBarrier = isWrite || _lastAccessIsWrite; + + _lastAccessIsWrite = isWrite; + + if (needsBarrier) + { + MemoryBarrier memoryBarrier = new MemoryBarrier() + { + SType = StructureType.MemoryBarrier, + SrcAccessMask = DefaultAccessFlags, + DstAccessMask = DefaultAccessFlags + }; + + _gd.Api.CmdPipelineBarrier( + commandBuffer, + PipelineStageFlags.PipelineStageAllCommandsBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + DependencyFlags.DependencyDeviceGroupBit, + 1, + memoryBarrier, + 0, + null, + 0, + null); + } + } + + public Auto GetBuffer() + { + return _buffer; + } + + public Auto GetBuffer(CommandBuffer commandBuffer, bool isWrite = false) + { + if (isWrite) + { + _cachedConvertedIndexBuffers.Clear(); + } + + // InsertBarrier(commandBuffer, isWrite); + return _buffer; + } + + public BufferHandle GetHandle() + { + var handle = _bufferHandle; + return Unsafe.As(ref handle); + } + + public unsafe IntPtr Map(int offset, int mappingSize) + { + return _map; + } + + public unsafe ReadOnlySpan GetData(int offset, int size) + { + if (_map != IntPtr.Zero) + { + return GetDataStorage(offset, size); + } + else + { + BackgroundResource resource = _gd.BackgroundResources.Get(); + + if (_gd.CommandBufferPool.OwnedByCurrentThread) + { + _gd.FlushAllCommands(); + + return resource.GetFlushBuffer().GetBufferData(_gd.CommandBufferPool, this, offset, size); + } + else + { + return resource.GetFlushBuffer().GetBufferData(resource.GetPool(), this, offset, size); + } + } + } + + public unsafe Span GetDataStorage(int offset, int size) + { + int mappingSize = Math.Min(size, Size - offset); + + if (_map != IntPtr.Zero) + { + return new Span((void*)(_map + offset), mappingSize); + } + + throw new InvalidOperationException("The buffer is not host mapped."); + } + + public unsafe void SetData(int offset, ReadOnlySpan data, CommandBufferScoped? cbs = null, Action endRenderPass = null) + { + int dataSize = Math.Min(data.Length, Size - offset); + if (dataSize == 0) + { + return; + } + + if (_map != IntPtr.Zero) + { + // If persistently mapped, set the data directly if the buffer is not currently in use. + // bool needsFlush = _gd.CommandBufferPool.HasWaitableOnRentedCommandBuffer(_waitable, offset, dataSize); + bool isRented = _buffer.HasRentedCommandBufferDependency(_gd.CommandBufferPool); + + // If the buffer is rented, take a little more time and check if the use overlaps this handle. + bool needsFlush = isRented && _waitable.IsBufferRangeInUse(offset, dataSize); + + if (!needsFlush) + { + WaitForFences(offset, dataSize); + + data.Slice(0, dataSize).CopyTo(new Span((void*)(_map + offset), dataSize)); + + return; + } + } + + if (cbs != null && !(_buffer.HasCommandBufferDependency(cbs.Value) && _waitable.IsBufferRangeInUse(cbs.Value.CommandBufferIndex, offset, dataSize))) + { + // If the buffer hasn't been used on the command buffer yet, try to preload the data. + // This avoids ending and beginning render passes on each buffer data upload. + + cbs = _gd.PipelineInternal.GetPreloadCommandBuffer(); + endRenderPass = null; + } + + if (cbs == null || + !VulkanConfiguration.UseFastBufferUpdates || + data.Length > MaxUpdateBufferSize || + !TryPushData(cbs.Value, endRenderPass, offset, data)) + { + _gd.BufferManager.StagingBuffer.PushData(_gd.CommandBufferPool, cbs, endRenderPass, this, offset, data); + } + } + + public unsafe void SetDataUnchecked(int offset, ReadOnlySpan data) + { + int dataSize = Math.Min(data.Length, Size - offset); + if (dataSize == 0) + { + return; + } + + if (_map != IntPtr.Zero) + { + data.Slice(0, dataSize).CopyTo(new Span((void*)(_map + offset), dataSize)); + } + else + { + _gd.BufferManager.StagingBuffer.PushData(_gd.CommandBufferPool, null, null, this, offset, data); + } + } + + public void SetDataInline(CommandBufferScoped cbs, Action endRenderPass, int dstOffset, ReadOnlySpan data) + { + if (!TryPushData(cbs, endRenderPass, dstOffset, data)) + { + throw new ArgumentException($"Invalid offset 0x{dstOffset:X} or data size 0x{data.Length:X}."); + } + } + + private unsafe bool TryPushData(CommandBufferScoped cbs, Action endRenderPass, int dstOffset, ReadOnlySpan data) + { + if ((dstOffset & 3) != 0 || (data.Length & 3) != 0) + { + return false; + } + + endRenderPass?.Invoke(); + + var dstBuffer = GetBuffer(cbs.CommandBuffer, true).Get(cbs, dstOffset, data.Length).Value; + + InsertBufferBarrier( + _gd, + cbs.CommandBuffer, + dstBuffer, + BufferHolder.DefaultAccessFlags, + AccessFlags.AccessTransferWriteBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + PipelineStageFlags.PipelineStageTransferBit, + dstOffset, + data.Length); + + fixed (byte* pData = data) + { + for (ulong offset = 0; offset < (ulong)data.Length;) + { + ulong size = Math.Min(MaxUpdateBufferSize, (ulong)data.Length - offset); + _gd.Api.CmdUpdateBuffer(cbs.CommandBuffer, dstBuffer, (ulong)dstOffset + offset, size, pData + offset); + offset += size; + } + } + + InsertBufferBarrier( + _gd, + cbs.CommandBuffer, + dstBuffer, + AccessFlags.AccessTransferWriteBit, + BufferHolder.DefaultAccessFlags, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + dstOffset, + data.Length); + + return true; + } + + public static unsafe void Copy( + VulkanGraphicsDevice gd, + CommandBufferScoped cbs, + Auto src, + Auto dst, + int srcOffset, + int dstOffset, + int size) + { + var srcBuffer = src.Get(cbs, srcOffset, size).Value; + var dstBuffer = dst.Get(cbs, dstOffset, size).Value; + + InsertBufferBarrier( + gd, + cbs.CommandBuffer, + dstBuffer, + BufferHolder.DefaultAccessFlags, + AccessFlags.AccessTransferWriteBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + PipelineStageFlags.PipelineStageTransferBit, + dstOffset, + size); + + var region = new BufferCopy((ulong)srcOffset, (ulong)dstOffset, (ulong)size); + + gd.Api.CmdCopyBuffer(cbs.CommandBuffer, srcBuffer, dstBuffer, 1, ®ion); + + InsertBufferBarrier( + gd, + cbs.CommandBuffer, + dstBuffer, + AccessFlags.AccessTransferWriteBit, + BufferHolder.DefaultAccessFlags, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + dstOffset, + size); + } + + public static unsafe void InsertBufferBarrier( + VulkanGraphicsDevice gd, + CommandBuffer commandBuffer, + VkBuffer buffer, + AccessFlags srcAccessMask, + AccessFlags dstAccessMask, + PipelineStageFlags srcStageMask, + PipelineStageFlags dstStageMask, + int offset, + int size) + { + BufferMemoryBarrier memoryBarrier = new BufferMemoryBarrier() + { + SType = StructureType.BufferMemoryBarrier, + SrcAccessMask = srcAccessMask, + DstAccessMask = dstAccessMask, + SrcQueueFamilyIndex = Vk.QueueFamilyIgnored, + DstQueueFamilyIndex = Vk.QueueFamilyIgnored, + Buffer = buffer, + Offset = (ulong)offset, + Size = (ulong)size + }; + + gd.Api.CmdPipelineBarrier( + commandBuffer, + srcStageMask, + dstStageMask, + 0, + 0, + null, + 1, + memoryBarrier, + 0, + null); + } + + public void WaitForFences() + { + _waitable.WaitForFences(_gd.Api, _device); + } + + public void WaitForFences(int offset, int size) + { + _waitable.WaitForFences(_gd.Api, _device, offset, size); + } + + public bool MayWait(int offset, int size) + { + return _waitable.MayWait(_gd.Api, _device, offset, size); + } + + public Auto GetBufferI8ToI16(CommandBufferScoped cbs, int offset, int size) + { + if (!_cachedConvertedIndexBuffers.TryGetValue(offset, size, out var holder)) + { + holder = _gd.BufferManager.Create(_gd, (size * 2 + 3) & ~3); + + _gd.HelperShader.ConvertI8ToI16(_gd, cbs, this, holder, offset, size); + + _cachedConvertedIndexBuffers.Add(offset, size, holder); + } + + return holder.GetBuffer(); + } + + public void Dispose() + { + _buffer.Dispose(); + _allocationAuto.Dispose(); + _cachedConvertedIndexBuffers.Dispose(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/BufferManager.cs b/Ryujinx.Graphics.Vulkan/BufferManager.cs new file mode 100644 index 000000000..8315ba4a0 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/BufferManager.cs @@ -0,0 +1,213 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class BufferManager : IDisposable + { + private const MemoryPropertyFlags DefaultBufferMemoryFlags = + MemoryPropertyFlags.MemoryPropertyHostVisibleBit | + MemoryPropertyFlags.MemoryPropertyHostCoherentBit | + MemoryPropertyFlags.MemoryPropertyHostCachedBit; + + private const MemoryPropertyFlags DeviceLocalBufferMemoryFlags = + MemoryPropertyFlags.MemoryPropertyDeviceLocalBit; + + private const MemoryPropertyFlags FlushableDeviceLocalBufferMemoryFlags = + MemoryPropertyFlags.MemoryPropertyHostVisibleBit | + MemoryPropertyFlags.MemoryPropertyHostCoherentBit | + MemoryPropertyFlags.MemoryPropertyDeviceLocalBit; + + private const BufferUsageFlags DefaultBufferUsageFlags = + BufferUsageFlags.BufferUsageTransferSrcBit | + BufferUsageFlags.BufferUsageTransferDstBit | + BufferUsageFlags.BufferUsageUniformTexelBufferBit | + BufferUsageFlags.BufferUsageStorageTexelBufferBit | + BufferUsageFlags.BufferUsageUniformBufferBit | + BufferUsageFlags.BufferUsageStorageBufferBit | + BufferUsageFlags.BufferUsageIndexBufferBit | + BufferUsageFlags.BufferUsageVertexBufferBit | + BufferUsageFlags.BufferUsageTransformFeedbackBufferBitExt; + + private readonly PhysicalDevice _physicalDevice; + private readonly Device _device; + + private readonly List _buffers; + + public StagingBuffer StagingBuffer { get; } + + public BufferManager(VulkanGraphicsDevice gd, PhysicalDevice physicalDevice, Device device) + { + _physicalDevice = physicalDevice; + _device = device; + _buffers = new List(); + StagingBuffer = new StagingBuffer(gd, this); + } + + public BufferHandle CreateWithHandle(VulkanGraphicsDevice gd, int size, bool deviceLocal) + { + var holder = Create(gd, size, deviceLocal: deviceLocal); + if (holder == null) + { + return BufferHandle.Null; + } + + ulong handle64 = (ulong)_buffers.Count + 1; + + var handle = Unsafe.As(ref handle64); + + _buffers.Add(holder); + + return handle; + } + + public unsafe BufferHolder Create(VulkanGraphicsDevice gd, int size, bool forConditionalRendering = false, bool deviceLocal = false) + { + var usage = DefaultBufferUsageFlags; + + if (forConditionalRendering && gd.Capabilities.SupportsConditionalRendering) + { + usage |= BufferUsageFlags.BufferUsageConditionalRenderingBitExt; + } + else if (gd.SupportsIndirectParameters) + { + usage |= BufferUsageFlags.BufferUsageIndirectBufferBit; + } + + var bufferCreateInfo = new BufferCreateInfo() + { + SType = StructureType.BufferCreateInfo, + Size = (ulong)size, + Usage = usage, + SharingMode = SharingMode.Exclusive + }; + + gd.Api.CreateBuffer(_device, in bufferCreateInfo, null, out var buffer).ThrowOnError(); + gd.Api.GetBufferMemoryRequirements(_device, buffer, out var requirements); + + var allocateFlags = deviceLocal ? DeviceLocalBufferMemoryFlags : DefaultBufferMemoryFlags; + + var allocation = gd.MemoryAllocator.AllocateDeviceMemory(_physicalDevice, requirements, allocateFlags); + + if (allocation.Memory.Handle == 0UL) + { + gd.Api.DestroyBuffer(_device, buffer, null); + return null; + } + + gd.Api.BindBufferMemory(_device, buffer, allocation.Memory, allocation.Offset); + + return new BufferHolder(gd, _device, buffer, allocation, size); + } + + public Auto CreateView(BufferHandle handle, VkFormat format, int offset, int size) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.CreateView(format, offset, size); + } + + return null; + } + + public Auto GetBuffer(CommandBuffer commandBuffer, BufferHandle handle, bool isWrite) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetBuffer(commandBuffer, isWrite); + } + + return null; + } + + public Auto GetBufferI8ToI16(CommandBufferScoped cbs, BufferHandle handle, int offset, int size) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetBufferI8ToI16(cbs, offset, size); + } + + return null; + } + + public Auto GetBuffer(CommandBuffer commandBuffer, BufferHandle handle, bool isWrite, out int size) + { + if (TryGetBuffer(handle, out var holder)) + { + size = holder.Size; + return holder.GetBuffer(commandBuffer, isWrite); + } + + size = 0; + return null; + } + + public ReadOnlySpan GetData(BufferHandle handle, int offset, int size) + { + if (TryGetBuffer(handle, out var holder)) + { + return holder.GetData(offset, size); + } + + return ReadOnlySpan.Empty; + } + + public void SetData(BufferHandle handle, int offset, ReadOnlySpan data) where T : unmanaged + { + SetData(handle, offset, MemoryMarshal.Cast(data), null, null); + } + + public void SetData(BufferHandle handle, int offset, ReadOnlySpan data, CommandBufferScoped? cbs, Action endRenderPass) + { + if (TryGetBuffer(handle, out var holder)) + { + holder.SetData(offset, data, cbs, endRenderPass); + } + } + + public void Delete(BufferHandle handle) + { + if (TryGetBuffer(handle, out var holder)) + { + holder.Dispose(); + // _buffers.Remove(handle); + } + } + + private bool TryGetBuffer(BufferHandle handle, out BufferHolder holder) + { + int index = (int)Unsafe.As(ref handle) - 1; + if ((uint)index < _buffers.Count) + { + holder = _buffers[index]; + return true; + } + + holder = default; + return false; + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + for (int i = 0; i < _buffers.Count; i++) + { + _buffers[i].Dispose(); + } + + StagingBuffer.Dispose(); + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/BufferRangeList.cs b/Ryujinx.Graphics.Vulkan/BufferRangeList.cs new file mode 100644 index 000000000..9cf249fea --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/BufferRangeList.cs @@ -0,0 +1,147 @@ +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Vulkan +{ + struct BufferRangeList + { + private struct Range + { + public int Offset { get; } + public int Size { get; } + + public Range(int offset, int size) + { + Offset = offset; + Size = size; + } + + public bool OverlapsWith(int offset, int size) + { + return Offset < offset + size && offset < Offset + Size; + } + } + + private List[] _ranges; + + public void Initialize() + { + _ranges = new List[CommandBufferPool.MaxCommandBuffers]; + } + + public void Add(int cbIndex, int offset, int size) + { + var list = _ranges[cbIndex]; + if (list != null) + { + int overlapIndex = BinarySearch(list, offset, size); + if (overlapIndex >= 0) + { + while (overlapIndex > 0 && list[overlapIndex - 1].OverlapsWith(offset, size)) + { + overlapIndex--; + } + + int endOffset = offset + size; + int startIndex = overlapIndex; + + while (overlapIndex < list.Count && list[overlapIndex].OverlapsWith(offset, size)) + { + var currentOverlap = list[overlapIndex]; + var currentOverlapEndOffset = currentOverlap.Offset + currentOverlap.Size; + + if (offset > currentOverlap.Offset) + { + offset = currentOverlap.Offset; + } + + if (endOffset < currentOverlapEndOffset) + { + endOffset = currentOverlapEndOffset; + } + + overlapIndex++; + } + + int count = overlapIndex - startIndex; + + list.RemoveRange(startIndex, count); + + size = endOffset - offset; + overlapIndex = startIndex; + } + else + { + overlapIndex = ~overlapIndex; + } + + list.Insert(overlapIndex, new Range(offset, size)); + + int last = 0; + foreach (var rg in list) + { + if (rg.Offset < last) + { + throw new System.Exception("list not properly sorted"); + } + last = rg.Offset; + } + } + else + { + list = new List + { + new Range(offset, size) + }; + + _ranges[cbIndex] = list; + } + } + + public bool OverlapsWith(int cbIndex, int offset, int size) + { + var list = _ranges[cbIndex]; + if (list == null) + { + return false; + } + + return BinarySearch(list, offset, size) >= 0; + } + + private static int BinarySearch(List list, int offset, int size) + { + int left = 0; + int right = list.Count - 1; + + while (left <= right) + { + int range = right - left; + + int middle = left + (range >> 1); + + var item = list[middle]; + + if (item.OverlapsWith(offset, size)) + { + return middle; + } + + if (offset < item.Offset) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return ~left; + } + + public void Clear(int cbIndex) + { + _ranges[cbIndex] = null; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/BufferState.cs b/Ryujinx.Graphics.Vulkan/BufferState.cs new file mode 100644 index 000000000..ccf2aca0e --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/BufferState.cs @@ -0,0 +1,83 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct BufferState : IDisposable + { + public static BufferState Null => new BufferState(null, 0, 0); + + private readonly Auto _buffer; + private readonly int _offset; + private readonly int _size; + private readonly ulong _stride; + private readonly IndexType _type; + + public BufferState(Auto buffer, int offset, int size, IndexType type) + { + _buffer = buffer; + _offset = offset; + _size = size; + _stride = 0; + _type = type; + buffer?.IncrementReferenceCount(); + } + + public BufferState(Auto buffer, int offset, int size, ulong stride = 0UL) + { + _buffer = buffer; + _offset = offset; + _size = size; + _stride = stride; + _type = IndexType.Uint16; + buffer?.IncrementReferenceCount(); + } + + public void BindIndexBuffer(Vk api, CommandBufferScoped cbs) + { + if (_buffer != null) + { + api.CmdBindIndexBuffer(cbs.CommandBuffer, _buffer.Get(cbs, _offset, _size).Value, (ulong)_offset, _type); + } + } + + public void BindTransformFeedbackBuffer(VulkanGraphicsDevice gd, CommandBufferScoped cbs, uint binding) + { + if (_buffer != null) + { + var buffer = _buffer.Get(cbs, _offset, _size).Value; + + gd.TransformFeedbackApi.CmdBindTransformFeedbackBuffers(cbs.CommandBuffer, binding, 1, buffer, (ulong)_offset, (ulong)_size); + } + } + + public void BindVertexBuffer(VulkanGraphicsDevice gd, CommandBufferScoped cbs, uint binding) + { + if (_buffer != null) + { + var buffer = _buffer.Get(cbs, _offset, _size).Value; + + if (gd.Capabilities.SupportsExtendedDynamicState) + { + gd.ExtendedDynamicStateApi.CmdBindVertexBuffers2( + cbs.CommandBuffer, + binding, + 1, + buffer, + (ulong)_offset, + (ulong)_size, + _stride); + } + else + { + gd.Api.CmdBindVertexBuffers(cbs.CommandBuffer, binding, 1, buffer, (ulong)_offset); + } + } + } + + public void Dispose() + { + _buffer?.DecrementReferenceCount(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/BufferUsageBitmap.cs b/Ryujinx.Graphics.Vulkan/BufferUsageBitmap.cs new file mode 100644 index 000000000..3242b9fc9 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/BufferUsageBitmap.cs @@ -0,0 +1,67 @@ +namespace Ryujinx.Graphics.Vulkan +{ + internal class BufferUsageBitmap + { + private BitMap _bitmap; + private int _size; + private int _granularity; + private int _bits; + + private int _intsPerCb; + private int _bitsPerCb; + + public BufferUsageBitmap(int size, int granularity) + { + _size = size; + _granularity = granularity; + _bits = (size + (granularity - 1)) / granularity; + + _intsPerCb = (_bits + (BitMap.IntSize - 1)) / BitMap.IntSize; + _bitsPerCb = _intsPerCb * BitMap.IntSize; + + _bitmap = new BitMap(_bitsPerCb * CommandBufferPool.MaxCommandBuffers); + } + + public void Add(int cbIndex, int offset, int size) + { + // Some usages can be out of bounds (vertex buffer on amd), so bound if necessary. + if (offset + size > _size) + { + size = _size - offset; + } + + int cbBase = cbIndex * _bitsPerCb; + int start = cbBase + offset / _granularity; + int end = cbBase + (offset + size - 1) / _granularity; + + _bitmap.SetRange(start, end); + } + + public bool OverlapsWith(int cbIndex, int offset, int size) + { + int cbBase = cbIndex * _bitsPerCb; + int start = cbBase + offset / _granularity; + int end = cbBase + (offset + size - 1) / _granularity; + + return _bitmap.IsSet(start, end); + } + + public bool OverlapsWith(int offset, int size) + { + for (int i = 0; i < CommandBufferPool.MaxCommandBuffers; i++) + { + if (OverlapsWith(i, offset, size)) + { + return true; + } + } + + return false; + } + + public void Clear(int cbIndex) + { + _bitmap.ClearInt(cbIndex * _intsPerCb, (cbIndex + 1) * _intsPerCb - 1); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/CacheByRange.cs b/Ryujinx.Graphics.Vulkan/CacheByRange.cs new file mode 100644 index 000000000..f3f503da4 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/CacheByRange.cs @@ -0,0 +1,54 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Vulkan +{ + struct CacheByRange where T : IDisposable + { + private Dictionary _ranges; + + public void Add(int offset, int size, T value) + { + EnsureInitialized(); + _ranges.Add(PackRange(offset, size), value); + } + + public bool TryGetValue(int offset, int size, out T value) + { + EnsureInitialized(); + return _ranges.TryGetValue(PackRange(offset, size), out value); + } + + public void Clear() + { + if (_ranges != null) + { + foreach (T value in _ranges.Values) + { + value.Dispose(); + } + + _ranges.Clear(); + _ranges = null; + } + } + + private void EnsureInitialized() + { + if (_ranges == null) + { + _ranges = new Dictionary(); + } + } + + private static ulong PackRange(int offset, int size) + { + return (uint)offset | ((ulong)size << 32); + } + + public void Dispose() + { + Clear(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/CommandBufferPool.cs b/Ryujinx.Graphics.Vulkan/CommandBufferPool.cs new file mode 100644 index 000000000..47337b666 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/CommandBufferPool.cs @@ -0,0 +1,353 @@ +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Thread = System.Threading.Thread; + +namespace Ryujinx.Graphics.Vulkan +{ + class CommandBufferPool : IDisposable + { + public const int MaxCommandBuffers = 16; + + private int _totalCommandBuffers; + private int _totalCommandBuffersMask; + + private readonly Vk _api; + private readonly Device _device; + private readonly Queue _queue; + private readonly object _queueLock; + private readonly CommandPool _pool; + private readonly Thread _owner; + + public bool OwnedByCurrentThread => _owner == Thread.CurrentThread; + + private struct ReservedCommandBuffer + { + public bool InUse; + public bool InConsumption; + public CommandBuffer CommandBuffer; + public FenceHolder Fence; + public SemaphoreHolder Semaphore; + + public List Dependants; + public HashSet Waitables; + public HashSet Dependencies; + + public void Initialize(Vk api, Device device, CommandPool pool) + { + var allocateInfo = new CommandBufferAllocateInfo() + { + SType = StructureType.CommandBufferAllocateInfo, + CommandBufferCount = 1, + CommandPool = pool, + Level = CommandBufferLevel.Primary + }; + + api.AllocateCommandBuffers(device, allocateInfo, out CommandBuffer); + + Dependants = new List(); + Waitables = new HashSet(); + Dependencies = new HashSet(); + } + } + + private readonly ReservedCommandBuffer[] _commandBuffers; + + private readonly int[] _queuedIndexes; + private int _queuedIndexesPtr; + private int _queuedCount; + private int _inUseCount; + + public unsafe CommandBufferPool(Vk api, Device device, Queue queue, object queueLock, uint queueFamilyIndex, bool isLight = false) + { + _api = api; + _device = device; + _queue = queue; + _queueLock = queueLock; + _owner = Thread.CurrentThread; + + var commandPoolCreateInfo = new CommandPoolCreateInfo() + { + SType = StructureType.CommandPoolCreateInfo, + QueueFamilyIndex = queueFamilyIndex, + Flags = CommandPoolCreateFlags.CommandPoolCreateTransientBit | + CommandPoolCreateFlags.CommandPoolCreateResetCommandBufferBit + }; + + api.CreateCommandPool(device, commandPoolCreateInfo, null, out _pool).ThrowOnError(); + + // We need at least 2 command buffers to get texture data in some cases. + _totalCommandBuffers = isLight ? 2 : MaxCommandBuffers; + _totalCommandBuffersMask = _totalCommandBuffers - 1; + + _commandBuffers = new ReservedCommandBuffer[_totalCommandBuffers]; + + _queuedIndexes = new int[_totalCommandBuffers]; + _queuedIndexesPtr = 0; + _queuedCount = 0; + + for (int i = 0; i < _totalCommandBuffers; i++) + { + _commandBuffers[i].Initialize(api, device, _pool); + WaitAndDecrementRef(i); + } + } + + public void AddDependant(int cbIndex, IAuto dependant) + { + dependant.IncrementReferenceCount(); + _commandBuffers[cbIndex].Dependants.Add(dependant); + } + + public void AddWaitable(MultiFenceHolder waitable) + { + lock (_commandBuffers) + { + for (int i = 0; i < _totalCommandBuffers; i++) + { + ref var entry = ref _commandBuffers[i]; + + if (entry.InConsumption) + { + AddWaitable(i, waitable); + } + } + } + } + + public void AddDependency(int cbIndex, CommandBufferScoped dependencyCbs) + { + Debug.Assert(_commandBuffers[cbIndex].InUse); + var semaphoreHolder = _commandBuffers[dependencyCbs.CommandBufferIndex].Semaphore; + semaphoreHolder.Get(); + _commandBuffers[cbIndex].Dependencies.Add(semaphoreHolder); + } + + public void AddWaitable(int cbIndex, MultiFenceHolder waitable) + { + ref var entry = ref _commandBuffers[cbIndex]; + waitable.AddFence(cbIndex, entry.Fence); + entry.Waitables.Add(waitable); + } + + public bool HasWaitableOnRentedCommandBuffer(MultiFenceHolder waitable, int offset, int size) + { + lock (_commandBuffers) + { + for (int i = 0; i < _totalCommandBuffers; i++) + { + ref var entry = ref _commandBuffers[i]; + + if (entry.InUse && + entry.Waitables.Contains(waitable) && + waitable.IsBufferRangeInUse(i, offset, size)) + { + return true; + } + } + } + + return false; + } + + public bool IsFenceOnRentedCommandBuffer(FenceHolder fence) + { + lock (_commandBuffers) + { + for (int i = 0; i < _totalCommandBuffers; i++) + { + ref var entry = ref _commandBuffers[i]; + + if (entry.InUse && entry.Fence == fence) + { + return true; + } + } + } + + return false; + } + + public FenceHolder GetFence(int cbIndex) + { + return _commandBuffers[cbIndex].Fence; + } + + private int FreeConsumed(bool wait) + { + int freeEntry = 0; + + while (_queuedCount > 0) + { + int index = _queuedIndexes[_queuedIndexesPtr]; + + ref var entry = ref _commandBuffers[index]; + + if (wait || !entry.InConsumption || entry.Fence.IsSignaled()) + { + WaitAndDecrementRef(index); + + wait = false; + freeEntry = index; + + _queuedCount--; + _queuedIndexesPtr = (_queuedIndexesPtr + 1) % _totalCommandBuffers; + } + else + { + break; + } + } + + return freeEntry; + } + + public CommandBufferScoped ReturnAndRent(CommandBufferScoped cbs) + { + Return(cbs); + return Rent(); + } + + public CommandBufferScoped Rent() + { + lock (_commandBuffers) + { + int cursor = FreeConsumed(_inUseCount + _queuedCount == _totalCommandBuffers); + + for (int i = 0; i < _totalCommandBuffers; i++) + { + ref var entry = ref _commandBuffers[cursor]; + + if (!entry.InUse && !entry.InConsumption) + { + entry.InUse = true; + + _inUseCount++; + + var commandBufferBeginInfo = new CommandBufferBeginInfo() + { + SType = StructureType.CommandBufferBeginInfo + }; + + _api.BeginCommandBuffer(entry.CommandBuffer, commandBufferBeginInfo); + + return new CommandBufferScoped(this, entry.CommandBuffer, cursor); + } + + cursor = (cursor + 1) & _totalCommandBuffersMask; + } + } + + throw new InvalidOperationException($"Out of command buffers (In use: {_inUseCount}, queued: {_queuedCount}, total: {_totalCommandBuffers})"); + } + + public void Return(CommandBufferScoped cbs) + { + Return(cbs, null, null, null); + } + + public unsafe void Return( + CommandBufferScoped cbs, + Semaphore[] waitSemaphores, + PipelineStageFlags[] waitDstStageMask, + Semaphore[] signalSemaphores) + { + lock (_commandBuffers) + { + int cbIndex = cbs.CommandBufferIndex; + + ref var entry = ref _commandBuffers[cbIndex]; + + Debug.Assert(entry.InUse); + Debug.Assert(entry.CommandBuffer.Handle == cbs.CommandBuffer.Handle); + entry.InUse = false; + entry.InConsumption = true; + _inUseCount--; + + var commandBuffer = entry.CommandBuffer; + + _api.EndCommandBuffer(commandBuffer); + + fixed (Semaphore* pWaitSemaphores = waitSemaphores, pSignalSemaphores = signalSemaphores) + { + fixed (PipelineStageFlags* pWaitDstStageMask = waitDstStageMask) + { + SubmitInfo sInfo = new SubmitInfo() + { + SType = StructureType.SubmitInfo, + WaitSemaphoreCount = waitSemaphores != null ? (uint)waitSemaphores.Length : 0, + PWaitSemaphores = pWaitSemaphores, + PWaitDstStageMask = pWaitDstStageMask, + CommandBufferCount = 1, + PCommandBuffers = &commandBuffer, + SignalSemaphoreCount = signalSemaphores != null ? (uint)signalSemaphores.Length : 0, + PSignalSemaphores = pSignalSemaphores + }; + + lock (_queueLock) + { + _api.QueueSubmit(_queue, 1, sInfo, entry.Fence.GetUnsafe()); + } + } + } + + int ptr = (_queuedIndexesPtr + _queuedCount) % _totalCommandBuffers; + _queuedIndexes[ptr] = cbIndex; + _queuedCount++; + // _api.QueueWaitIdle(_queue); + } + } + + private void WaitAndDecrementRef(int cbIndex, bool refreshFence = true) + { + ref var entry = ref _commandBuffers[cbIndex]; + + if (entry.InConsumption) + { + entry.Fence.Wait(); + entry.InConsumption = false; + } + + foreach (var dependant in entry.Dependants) + { + dependant.DecrementReferenceCount(cbIndex); + } + + foreach (var waitable in entry.Waitables) + { + waitable.RemoveFence(cbIndex, entry.Fence); + waitable.RemoveBufferUses(cbIndex); + } + + foreach (var dependency in entry.Dependencies) + { + dependency.Put(); + } + + entry.Dependants.Clear(); + entry.Waitables.Clear(); + entry.Dependencies.Clear(); + entry.Fence?.Dispose(); + + if (refreshFence) + { + entry.Fence = new FenceHolder(_api, _device); + } + else + { + entry.Fence = null; + } + } + + public unsafe void Dispose() + { + for (int i = 0; i < _totalCommandBuffers; i++) + { + WaitAndDecrementRef(i, refreshFence: false); + } + + _api.DestroyCommandPool(_device, _pool, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/CommandBufferScoped.cs b/Ryujinx.Graphics.Vulkan/CommandBufferScoped.cs new file mode 100644 index 000000000..372950a88 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/CommandBufferScoped.cs @@ -0,0 +1,44 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct CommandBufferScoped : IDisposable + { + private readonly CommandBufferPool _pool; + public CommandBuffer CommandBuffer { get; } + public int CommandBufferIndex { get; } + + public CommandBufferScoped(CommandBufferPool pool, CommandBuffer commandBuffer, int commandBufferIndex) + { + _pool = pool; + CommandBuffer = commandBuffer; + CommandBufferIndex = commandBufferIndex; + } + + public void AddDependant(IAuto dependant) + { + _pool.AddDependant(CommandBufferIndex, dependant); + } + + public void AddWaitable(MultiFenceHolder waitable) + { + _pool.AddWaitable(CommandBufferIndex, waitable); + } + + public void AddDependency(CommandBufferScoped dependencyCbs) + { + _pool.AddDependency(CommandBufferIndex, dependencyCbs); + } + + public FenceHolder GetFence() + { + return _pool.GetFence(CommandBufferIndex); + } + + public void Dispose() + { + _pool?.Return(this); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Constants.cs b/Ryujinx.Graphics.Vulkan/Constants.cs new file mode 100644 index 000000000..59e79a1a1 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Constants.cs @@ -0,0 +1,20 @@ +namespace Ryujinx.Graphics.Vulkan +{ + static class Constants + { + public const int MaxVertexAttributes = 32; + public const int MaxVertexBuffers = 32; + public const int MaxTransformFeedbackBuffers = 4; + public const int MaxRenderTargets = 8; + public const int MaxViewports = 16; + public const int MaxShaderStages = 5; + public const int MaxUniformBuffersPerStage = 18; + public const int MaxStorageBuffersPerStage = 16; + public const int MaxTexturesPerStage = 64; + public const int MaxImagesPerStage = 8; + public const int MaxUniformBufferBindings = MaxUniformBuffersPerStage * MaxShaderStages; + public const int MaxStorageBufferBindings = MaxStorageBuffersPerStage * MaxShaderStages; + public const int MaxTextureBindings = MaxTexturesPerStage * MaxShaderStages; + public const int MaxImageBindings = MaxImagesPerStage * MaxShaderStages; + } +} diff --git a/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs b/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs new file mode 100644 index 000000000..c57cb1a95 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs @@ -0,0 +1,246 @@ +using Silk.NET.Vulkan; +using System; +using VkBuffer = Silk.NET.Vulkan.Buffer; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DescriptorSetCollection : IDisposable + { + private DescriptorSetManager.DescriptorPoolHolder _holder; + private readonly DescriptorSet[] _descriptorSets; + public int SetsCount => _descriptorSets.Length; + + public DescriptorSetCollection(DescriptorSetManager.DescriptorPoolHolder holder, DescriptorSet[] descriptorSets) + { + _holder = holder; + _descriptorSets = descriptorSets; + } + + public void InitializeBuffers(int setIndex, int baseBinding, int countPerUnit, DescriptorType type, VkBuffer dummyBuffer) + { + Span infos = stackalloc DescriptorBufferInfo[countPerUnit]; + + infos.Fill(new DescriptorBufferInfo() + { + Buffer = dummyBuffer, + Range = Vk.WholeSize + }); + + UpdateBuffers(setIndex, baseBinding, infos, type); + } + + public unsafe void UpdateBuffer(int setIndex, int bindingIndex, DescriptorBufferInfo bufferInfo, DescriptorType type) + { + if (bufferInfo.Buffer.Handle != 0UL) + { + var writeDescriptorSet = new WriteDescriptorSet + { + SType = StructureType.WriteDescriptorSet, + DstSet = _descriptorSets[setIndex], + DstBinding = (uint)bindingIndex, + DescriptorType = type, + DescriptorCount = 1, + PBufferInfo = &bufferInfo + }; + + _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null); + } + } + + public unsafe void UpdateBuffers(int setIndex, int baseBinding, ReadOnlySpan bufferInfo, DescriptorType type) + { + if (bufferInfo.Length == 0) + { + return; + } + + fixed (DescriptorBufferInfo* pBufferInfo = bufferInfo) + { + var writeDescriptorSet = new WriteDescriptorSet + { + SType = StructureType.WriteDescriptorSet, + DstSet = _descriptorSets[setIndex], + DstBinding = (uint)baseBinding, + DescriptorType = type, + DescriptorCount = (uint)bufferInfo.Length, + PBufferInfo = pBufferInfo + }; + + _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null); + } + } + + public unsafe void UpdateStorageBuffers(int setIndex, int baseBinding, ReadOnlySpan bufferInfo) + { + if (bufferInfo.Length == 0) + { + return; + } + + fixed (DescriptorBufferInfo* pBufferInfo = bufferInfo) + { + var writeDescriptorSet = new WriteDescriptorSet + { + SType = StructureType.WriteDescriptorSet, + DstSet = _descriptorSets[setIndex], + DstBinding = (uint)(baseBinding & ~(Constants.MaxStorageBuffersPerStage - 1)), + DstArrayElement = (uint)(baseBinding & (Constants.MaxStorageBuffersPerStage - 1)), + DescriptorType = DescriptorType.StorageBuffer, + DescriptorCount = (uint)bufferInfo.Length, + PBufferInfo = pBufferInfo + }; + + _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null); + } + } + + public unsafe void UpdateImage(int setIndex, int bindingIndex, DescriptorImageInfo imageInfo, DescriptorType type) + { + if (imageInfo.ImageView.Handle != 0UL) + { + var writeDescriptorSet = new WriteDescriptorSet + { + SType = StructureType.WriteDescriptorSet, + DstSet = _descriptorSets[setIndex], + DstBinding = (uint)bindingIndex, + DescriptorType = type, + DescriptorCount = 1, + PImageInfo = &imageInfo + }; + + _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null); + } + } + + public unsafe void UpdateImages(int setIndex, int baseBinding, ReadOnlySpan imageInfo, DescriptorType type) + { + if (imageInfo.Length == 0) + { + return; + } + + fixed (DescriptorImageInfo* pImageInfo = imageInfo) + { + var writeDescriptorSet = new WriteDescriptorSet + { + SType = StructureType.WriteDescriptorSet, + DstSet = _descriptorSets[setIndex], + DstBinding = (uint)baseBinding, + DescriptorType = type, + DescriptorCount = (uint)imageInfo.Length, + PImageInfo = pImageInfo + }; + + _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null); + } + } + + public unsafe void UpdateImagesCombined(int setIndex, int baseBinding, ReadOnlySpan imageInfo, DescriptorType type) + { + if (imageInfo.Length == 0) + { + return; + } + + fixed (DescriptorImageInfo* pImageInfo = imageInfo) + { + for (int i = 0; i < imageInfo.Length; i++) + { + bool nonNull = imageInfo[i].ImageView.Handle != 0 && imageInfo[i].Sampler.Handle != 0; + if (nonNull) + { + int count = 1; + + while (i + count < imageInfo.Length && + imageInfo[i + count].ImageView.Handle != 0 && + imageInfo[i + count].Sampler.Handle != 0) + { + count++; + } + + var writeDescriptorSet = new WriteDescriptorSet + { + SType = StructureType.WriteDescriptorSet, + DstSet = _descriptorSets[setIndex], + DstBinding = (uint)(baseBinding + i), + DescriptorType = DescriptorType.CombinedImageSampler, + DescriptorCount = (uint)count, + PImageInfo = pImageInfo + }; + + _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null); + + i += count - 1; + } + } + } + } + + public unsafe void UpdateBufferImage(int setIndex, int bindingIndex, BufferView texelBufferView, DescriptorType type) + { + if (texelBufferView.Handle != 0UL) + { + var writeDescriptorSet = new WriteDescriptorSet + { + SType = StructureType.WriteDescriptorSet, + DstSet = _descriptorSets[setIndex], + DstBinding = (uint)bindingIndex, + DescriptorType = type, + DescriptorCount = 1, + PTexelBufferView = &texelBufferView + }; + + _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null); + } + } + + public unsafe void UpdateBufferImages(int setIndex, int baseBinding, ReadOnlySpan texelBufferView, DescriptorType type) + { + if (texelBufferView.Length == 0) + { + return; + } + + fixed (BufferView* pTexelBufferView = texelBufferView) + { + for (uint i = 0; i < texelBufferView.Length;) + { + uint count = 1; + + if (texelBufferView[(int)i].Handle != 0UL) + { + while (i + count < texelBufferView.Length && texelBufferView[(int)(i + count)].Handle != 0UL) + { + count++; + } + + var writeDescriptorSet = new WriteDescriptorSet + { + SType = StructureType.WriteDescriptorSet, + DstSet = _descriptorSets[setIndex], + DstBinding = (uint)baseBinding + i, + DescriptorType = type, + DescriptorCount = count, + PTexelBufferView = pTexelBufferView + i + }; + + _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null); + } + + i += count; + } + } + } + + public DescriptorSet[] GetSets() + { + return _descriptorSets; + } + + public void Dispose() + { + _holder?.FreeDescriptorSets(this); + _holder = null; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DescriptorSetManager.cs b/Ryujinx.Graphics.Vulkan/DescriptorSetManager.cs new file mode 100644 index 000000000..a88bb7b12 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DescriptorSetManager.cs @@ -0,0 +1,201 @@ +using Silk.NET.Vulkan; +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Vulkan +{ + class DescriptorSetManager : IDisposable + { + private const uint DescriptorPoolMultiplier = 16; + + public class DescriptorPoolHolder : IDisposable + { + public Vk Api { get; } + public Device Device { get; } + + private readonly DescriptorPool _pool; + private readonly uint _capacity; + private int _totalSets; + private int _setsInUse; + private bool _done; + + public unsafe DescriptorPoolHolder(Vk api, Device device) + { + Api = api; + Device = device; + + var poolSizes = new DescriptorPoolSize[] + { + new DescriptorPoolSize(DescriptorType.UniformBuffer, (1 + Constants.MaxUniformBufferBindings) * DescriptorPoolMultiplier), + new DescriptorPoolSize(DescriptorType.StorageBuffer, Constants.MaxStorageBufferBindings * DescriptorPoolMultiplier), + new DescriptorPoolSize(DescriptorType.CombinedImageSampler, Constants.MaxTextureBindings * DescriptorPoolMultiplier), + new DescriptorPoolSize(DescriptorType.StorageImage, Constants.MaxImageBindings * DescriptorPoolMultiplier), + new DescriptorPoolSize(DescriptorType.UniformTexelBuffer, Constants.MaxTextureBindings * DescriptorPoolMultiplier), + new DescriptorPoolSize(DescriptorType.StorageTexelBuffer, Constants.MaxImageBindings * DescriptorPoolMultiplier) + }; + + uint maxSets = (uint)poolSizes.Length * DescriptorPoolMultiplier; + + _capacity = maxSets; + + fixed (DescriptorPoolSize* pPoolsSize = poolSizes) + { + var descriptorPoolCreateInfo = new DescriptorPoolCreateInfo() + { + SType = StructureType.DescriptorPoolCreateInfo, + MaxSets = maxSets, + PoolSizeCount = (uint)poolSizes.Length, + PPoolSizes = pPoolsSize + }; + + Api.CreateDescriptorPool(device, descriptorPoolCreateInfo, null, out _pool).ThrowOnError(); + } + } + + public unsafe DescriptorSetCollection AllocateDescriptorSets(ReadOnlySpan layouts) + { + TryAllocateDescriptorSets(layouts, isTry: false, out var dsc); + return dsc; + } + + public bool TryAllocateDescriptorSets(ReadOnlySpan layouts, out DescriptorSetCollection dsc) + { + return TryAllocateDescriptorSets(layouts, isTry: true, out dsc); + } + + private unsafe bool TryAllocateDescriptorSets(ReadOnlySpan layouts, bool isTry, out DescriptorSetCollection dsc) + { + Debug.Assert(!_done); + + DescriptorSet[] descriptorSets = new DescriptorSet[layouts.Length]; + + fixed (DescriptorSet* pDescriptorSets = descriptorSets) + { + fixed (DescriptorSetLayout* pLayouts = layouts) + { + var descriptorSetAllocateInfo = new DescriptorSetAllocateInfo() + { + SType = StructureType.DescriptorSetAllocateInfo, + DescriptorPool = _pool, + DescriptorSetCount = (uint)layouts.Length, + PSetLayouts = pLayouts + }; + + var result = Api.AllocateDescriptorSets(Device, &descriptorSetAllocateInfo, pDescriptorSets); + if (isTry && result == Result.ErrorOutOfPoolMemory) + { + _totalSets = (int)_capacity; + _done = true; + DestroyIfDone(); + dsc = default; + return false; + } + + result.ThrowOnError(); + } + } + + _totalSets += layouts.Length; + _setsInUse += layouts.Length; + + dsc = new DescriptorSetCollection(this, descriptorSets); + return true; + } + + public void FreeDescriptorSets(DescriptorSetCollection dsc) + { + _setsInUse -= dsc.SetsCount; + Debug.Assert(_setsInUse >= 0); + DestroyIfDone(); + } + + public bool CanFit(int count) + { + if (_totalSets + count <= _capacity) + { + return true; + } + + _done = true; + DestroyIfDone(); + return false; + } + + private unsafe void DestroyIfDone() + { + if (_done && _setsInUse == 0) + { + Api.DestroyDescriptorPool(Device, _pool, null); + } + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + unsafe + { + Api.DestroyDescriptorPool(Device, _pool, null); + } + } + } + + public void Dispose() + { + Dispose(true); + } + } + + private readonly Device _device; + private DescriptorPoolHolder _currentPool; + + public DescriptorSetManager(Device device) + { + _device = device; + } + + public Auto AllocateDescriptorSet(Vk api, DescriptorSetLayout layout) + { + Span layouts = stackalloc DescriptorSetLayout[1]; + layouts[0] = layout; + return AllocateDescriptorSets(api, layouts); + } + + public Auto AllocateDescriptorSets(Vk api, ReadOnlySpan layouts) + { + // If we fail the first time, just create a new pool and try again. + if (!GetPool(api, layouts.Length).TryAllocateDescriptorSets(layouts, out var dsc)) + { + dsc = GetPool(api, layouts.Length).AllocateDescriptorSets(layouts); + } + + return new Auto(dsc); + } + + private DescriptorPoolHolder GetPool(Vk api, int requiredCount) + { + if (_currentPool == null || !_currentPool.CanFit(requiredCount)) + { + _currentPool = new DescriptorPoolHolder(api, _device); + } + + return _currentPool; + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + unsafe + { + _currentPool?.Dispose(); + } + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs b/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs new file mode 100644 index 000000000..7caf319e5 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs @@ -0,0 +1,609 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Silk.NET.Vulkan; +using System; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Vulkan +{ + class DescriptorSetUpdater + { + private readonly VulkanGraphicsDevice _gd; + private readonly PipelineBase _pipeline; + + private ShaderCollection _program; + + private Auto[] _uniformBufferRefs; + private Auto[] _storageBufferRefs; + private Auto[] _textureRefs; + private Auto[] _samplerRefs; + private Auto[] _imageRefs; + private TextureBuffer[] _bufferTextureRefs; + private TextureBuffer[] _bufferImageRefs; + private GAL.Format[] _bufferImageFormats; + + private DescriptorBufferInfo[] _uniformBuffers; + private DescriptorBufferInfo[] _storageBuffers; + private DescriptorImageInfo[] _textures; + private DescriptorImageInfo[] _images; + private BufferView[] _bufferTextures; + private BufferView[] _bufferImages; + + private bool[] _uniformSet; + private bool[] _storageSet; + private Silk.NET.Vulkan.Buffer _cachedSupportBuffer; + + [Flags] + private enum DirtyFlags + { + None = 0, + Uniform = 1 << 0, + Storage = 1 << 1, + Texture = 1 << 2, + Image = 1 << 3, + All = Uniform | Storage | Texture | Image + } + + private DirtyFlags _dirty; + + private readonly BufferHolder _dummyBuffer; + private readonly TextureView _dummyTexture; + private readonly SamplerHolder _dummySampler; + + public DescriptorSetUpdater(VulkanGraphicsDevice gd, PipelineBase pipeline) + { + _gd = gd; + _pipeline = pipeline; + + // Some of the bindings counts needs to be multiplied by 2 because we have buffer and + // regular textures/images interleaved on the same descriptor set. + + _uniformBufferRefs = new Auto[Constants.MaxUniformBufferBindings]; + _storageBufferRefs = new Auto[Constants.MaxStorageBufferBindings]; + _textureRefs = new Auto[Constants.MaxTextureBindings * 2]; + _samplerRefs = new Auto[Constants.MaxTextureBindings * 2]; + _imageRefs = new Auto[Constants.MaxImageBindings * 2]; + _bufferTextureRefs = new TextureBuffer[Constants.MaxTextureBindings * 2]; + _bufferImageRefs = new TextureBuffer[Constants.MaxImageBindings * 2]; + _bufferImageFormats = new GAL.Format[Constants.MaxImageBindings * 2]; + + _uniformBuffers = new DescriptorBufferInfo[Constants.MaxUniformBufferBindings]; + _storageBuffers = new DescriptorBufferInfo[Constants.MaxStorageBufferBindings]; + _textures = new DescriptorImageInfo[Constants.MaxTexturesPerStage]; + _images = new DescriptorImageInfo[Constants.MaxImagesPerStage]; + _bufferTextures = new BufferView[Constants.MaxTexturesPerStage]; + _bufferImages = new BufferView[Constants.MaxImagesPerStage]; + + var initialImageInfo = new DescriptorImageInfo() + { + ImageLayout = ImageLayout.General + }; + + _textures.AsSpan().Fill(initialImageInfo); + _images.AsSpan().Fill(initialImageInfo); + + _uniformSet = new bool[Constants.MaxUniformBufferBindings]; + _storageSet = new bool[Constants.MaxStorageBufferBindings]; + + if (gd.Capabilities.SupportsNullDescriptors) + { + // If null descriptors are supported, we can pass null as the handle. + _dummyBuffer = null; + } + else + { + // If null descriptors are not supported, we need to pass the handle of a dummy buffer on unused bindings. + _dummyBuffer = gd.BufferManager.Create(gd, 0x10000, forConditionalRendering: false, deviceLocal: true); + } + + _dummyTexture = gd.CreateTextureView(new GAL.TextureCreateInfo( + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 4, + GAL.Format.R8G8B8A8Unorm, + DepthStencilMode.Depth, + Target.Texture2D, + SwizzleComponent.Red, + SwizzleComponent.Green, + SwizzleComponent.Blue, + SwizzleComponent.Alpha), 1f); + + _dummySampler = (SamplerHolder)gd.CreateSampler(new GAL.SamplerCreateInfo( + MinFilter.Nearest, + MagFilter.Nearest, + false, + AddressMode.Repeat, + AddressMode.Repeat, + AddressMode.Repeat, + CompareMode.None, + GAL.CompareOp.Always, + new ColorF(0, 0, 0, 0), + 0, + 0, + 0, + 1f)); + } + + public void SetProgram(ShaderCollection program) + { + _program = program; + _dirty = DirtyFlags.All; + } + + public void SetImage(int binding, ITexture image, GAL.Format imageFormat) + { + if (image == null) + { + return; + } + + if (image is TextureBuffer imageBuffer) + { + _bufferImageRefs[binding] = imageBuffer; + _bufferImageFormats[binding] = imageFormat; + } + else if (image is TextureView view) + { + _imageRefs[binding] = view.GetView(imageFormat).GetIdentityImageView(); + } + + SignalDirty(DirtyFlags.Image); + } + + public void SetStorageBuffers(CommandBuffer commandBuffer, int first, ReadOnlySpan buffers) + { + for (int i = 0; i < buffers.Length; i++) + { + var buffer = buffers[i]; + int index = first + i; + + Auto vkBuffer = _gd.BufferManager.GetBuffer(commandBuffer, buffer.Handle, false); + ref Auto currentVkBuffer = ref _storageBufferRefs[index]; + + DescriptorBufferInfo info = new DescriptorBufferInfo() + { + Offset = (ulong)buffer.Offset, + Range = (ulong)buffer.Size + }; + ref DescriptorBufferInfo currentInfo = ref _storageBuffers[index]; + + if (vkBuffer != currentVkBuffer || currentInfo.Offset != info.Offset || currentInfo.Range != info.Range) + { + _storageSet[index] = false; + + currentInfo = info; + currentVkBuffer = vkBuffer; + } + } + + SignalDirty(DirtyFlags.Storage); + } + + public void SetTextureAndSampler(CommandBufferScoped cbs, ShaderStage stage, int binding, ITexture texture, ISampler sampler) + { + if (texture == null) + { + return; + } + + if (texture is TextureBuffer textureBuffer) + { + _bufferTextureRefs[binding] = textureBuffer; + } + else + { + TextureView view = (TextureView)texture; + + view.Storage.InsertBarrier(cbs, AccessFlags.AccessShaderReadBit, stage.ConvertToPipelineStageFlags()); + + _textureRefs[binding] = view.GetImageView(); + _samplerRefs[binding] = ((SamplerHolder)sampler)?.GetSampler(); + } + + SignalDirty(DirtyFlags.Texture); + } + + public void SetUniformBuffers(CommandBuffer commandBuffer, int first, ReadOnlySpan buffers) + { + for (int i = 0; i < buffers.Length; i++) + { + var buffer = buffers[i]; + int index = first + i; + + Auto vkBuffer = _gd.BufferManager.GetBuffer(commandBuffer, buffer.Handle, false); + ref Auto currentVkBuffer = ref _uniformBufferRefs[index]; + + DescriptorBufferInfo info = new DescriptorBufferInfo() + { + Offset = (ulong)buffer.Offset, + Range = (ulong)buffer.Size + }; + ref DescriptorBufferInfo currentInfo = ref _uniformBuffers[index]; + + if (vkBuffer != currentVkBuffer || currentInfo.Offset != info.Offset || currentInfo.Range != info.Range) + { + _uniformSet[index] = false; + + currentInfo = info; + currentVkBuffer = vkBuffer; + } + } + + SignalDirty(DirtyFlags.Uniform); + } + + private void SignalDirty(DirtyFlags flag) + { + _dirty |= flag; + } + + public void UpdateAndBindDescriptorSets(CommandBufferScoped cbs, PipelineBindPoint pbp) + { + if ((_dirty & DirtyFlags.All) == 0) + { + return; + } + + // System.Console.WriteLine("modified " + _dirty + " " + _modified + " on program " + _program.GetHashCode().ToString("X")); + + if (_dirty.HasFlag(DirtyFlags.Uniform)) + { + if (_program.UsePushDescriptors) + { + UpdateAndBindUniformBufferPd(cbs, pbp); + } + else + { + UpdateAndBind(cbs, PipelineBase.UniformSetIndex, pbp); + } + } + + if (_dirty.HasFlag(DirtyFlags.Storage)) + { + UpdateAndBind(cbs, PipelineBase.StorageSetIndex, pbp); + } + + if (_dirty.HasFlag(DirtyFlags.Texture)) + { + UpdateAndBind(cbs, PipelineBase.TextureSetIndex, pbp); + } + + if (_dirty.HasFlag(DirtyFlags.Image)) + { + UpdateAndBind(cbs, PipelineBase.ImageSetIndex, pbp); + } + + _dirty = DirtyFlags.None; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void UpdateBuffer( + CommandBufferScoped cbs, + ref DescriptorBufferInfo info, + Auto buffer, + Auto dummyBuffer) + { + info.Buffer = buffer?.Get(cbs, (int)info.Offset, (int)info.Range).Value ?? default; + + // The spec requires that buffers with null handle have offset as 0 and range as VK_WHOLE_SIZE. + if (info.Buffer.Handle == 0) + { + info.Buffer = dummyBuffer?.Get(cbs).Value ?? default; + info.Offset = 0; + info.Range = Vk.WholeSize; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void UpdateAndBind(CommandBufferScoped cbs, int setIndex, PipelineBindPoint pbp) + { + var program = _program; + int stagesCount = program.Bindings[setIndex].Length; + if (stagesCount == 0 && setIndex != PipelineBase.UniformSetIndex) + { + return; + } + + var dummyBuffer = _dummyBuffer?.GetBuffer(); + + var dsc = program.GetNewDescriptorSetCollection(_gd, cbs.CommandBufferIndex, setIndex, out var isNew).Get(cbs); + + if (!program.HasMinimalLayout) + { + if (isNew) + { + Initialize(cbs, setIndex, dsc); + } + + if (setIndex == PipelineBase.UniformSetIndex) + { + Span uniformBuffer = stackalloc DescriptorBufferInfo[1]; + + if (!_uniformSet[0]) + { + _cachedSupportBuffer = _gd.BufferManager.GetBuffer(cbs.CommandBuffer, _pipeline.SupportBufferUpdater.Handle, false).Get(cbs, 0, SupportBuffer.RequiredSize).Value; + _uniformSet[0] = true; + } + + uniformBuffer[0] = new DescriptorBufferInfo() + { + Offset = 0, + Range = (ulong)SupportBuffer.RequiredSize, + Buffer = _cachedSupportBuffer + }; + + dsc.UpdateBuffers(0, 0, uniformBuffer, DescriptorType.UniformBuffer); + } + } + + for (int stageIndex = 0; stageIndex < stagesCount; stageIndex++) + { + var stageBindings = program.Bindings[setIndex][stageIndex]; + int bindingsCount = stageBindings.Length; + int count; + + for (int bindingIndex = 0; bindingIndex < bindingsCount; bindingIndex += count) + { + int binding = stageBindings[bindingIndex]; + count = 1; + + while (bindingIndex + count < bindingsCount && stageBindings[bindingIndex + count] == binding + count) + { + count++; + } + + if (setIndex == PipelineBase.UniformSetIndex) + { + for (int i = 0; i < count; i++) + { + int index = binding + i; + + if (!_uniformSet[index]) + { + UpdateBuffer(cbs, ref _uniformBuffers[index], _uniformBufferRefs[index], dummyBuffer); + + _uniformSet[index] = true; + } + } + + ReadOnlySpan uniformBuffers = _uniformBuffers; + dsc.UpdateBuffers(0, binding, uniformBuffers.Slice(binding, count), DescriptorType.UniformBuffer); + } + else if (setIndex == PipelineBase.StorageSetIndex) + { + for (int i = 0; i < count; i++) + { + int index = binding + i; + + if (!_storageSet[index]) + { + UpdateBuffer(cbs, ref _storageBuffers[index], _storageBufferRefs[index], dummyBuffer); + + _storageSet[index] = true; + } + } + + ReadOnlySpan storageBuffers = _storageBuffers; + dsc.UpdateStorageBuffers(0, binding, storageBuffers.Slice(binding, count)); + } + else if (setIndex == PipelineBase.TextureSetIndex) + { + if (((uint)binding % (Constants.MaxTexturesPerStage * 2)) < Constants.MaxTexturesPerStage || program.HasMinimalLayout) + { + Span textures = _textures; + + for (int i = 0; i < count; i++) + { + ref var texture = ref textures[i]; + + texture.ImageView = _textureRefs[binding + i]?.Get(cbs).Value ?? default; + texture.Sampler = _samplerRefs[binding + i]?.Get(cbs).Value ?? default; + + if (texture.ImageView.Handle == 0) + { + texture.ImageView = _dummyTexture.GetImageView().Get(cbs).Value; + } + + if (texture.Sampler.Handle == 0) + { + texture.Sampler = _dummySampler.GetSampler().Get(cbs).Value; + } + } + + dsc.UpdateImages(0, binding, textures.Slice(0, count), DescriptorType.CombinedImageSampler); + } + else + { + Span bufferTextures = _bufferTextures; + + for (int i = 0; i < count; i++) + { + bufferTextures[i] = _bufferTextureRefs[binding + i]?.GetBufferView(cbs) ?? default; + } + + dsc.UpdateBufferImages(0, binding, bufferTextures.Slice(0, count), DescriptorType.UniformTexelBuffer); + } + } + else if (setIndex == PipelineBase.ImageSetIndex) + { + if (((uint)binding % (Constants.MaxImagesPerStage * 2)) < Constants.MaxImagesPerStage || program.HasMinimalLayout) + { + Span images = _images; + + for (int i = 0; i < count; i++) + { + images[i].ImageView = _imageRefs[binding + i]?.Get(cbs).Value ?? default; + } + + dsc.UpdateImages(0, binding, images.Slice(0, count), DescriptorType.StorageImage); + } + else + { + Span bufferImages = _bufferImages; + + for (int i = 0; i < count; i++) + { + bufferImages[i] = _bufferImageRefs[binding + i]?.GetBufferView(cbs, _bufferImageFormats[binding + i]) ?? default; + } + + dsc.UpdateBufferImages(0, binding, bufferImages.Slice(0, count), DescriptorType.StorageTexelBuffer); + } + } + } + } + + var sets = dsc.GetSets(); + + _gd.Api.CmdBindDescriptorSets(cbs.CommandBuffer, pbp, _program.PipelineLayout, (uint)setIndex, 1, sets, 0, ReadOnlySpan.Empty); + } + + private unsafe void UpdateBuffers( + CommandBufferScoped cbs, + PipelineBindPoint pbp, + int baseBinding, + ReadOnlySpan bufferInfo, + DescriptorType type) + { + if (bufferInfo.Length == 0) + { + return; + } + + fixed (DescriptorBufferInfo* pBufferInfo = bufferInfo) + { + var writeDescriptorSet = new WriteDescriptorSet + { + SType = StructureType.WriteDescriptorSet, + DstBinding = (uint)baseBinding, + DescriptorType = type, + DescriptorCount = (uint)bufferInfo.Length, + PBufferInfo = pBufferInfo + }; + + _gd.PushDescriptorApi.CmdPushDescriptorSet(cbs.CommandBuffer, pbp, _program.PipelineLayout, 0, 1, &writeDescriptorSet); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void UpdateAndBindUniformBufferPd(CommandBufferScoped cbs, PipelineBindPoint pbp) + { + var dummyBuffer = _dummyBuffer?.GetBuffer(); + int stagesCount = _program.Bindings[PipelineBase.UniformSetIndex].Length; + + if (!_uniformSet[0]) + { + Span uniformBuffer = stackalloc DescriptorBufferInfo[1]; + + uniformBuffer[0] = new DescriptorBufferInfo() + { + Offset = 0, + Range = (ulong)SupportBuffer.RequiredSize, + Buffer = _gd.BufferManager.GetBuffer(cbs.CommandBuffer, _pipeline.SupportBufferUpdater.Handle, false).Get(cbs, 0, SupportBuffer.RequiredSize).Value + }; + + _uniformSet[0] = true; + + UpdateBuffers(cbs, pbp, 0, uniformBuffer, DescriptorType.UniformBuffer); + } + + for (int stageIndex = 0; stageIndex < stagesCount; stageIndex++) + { + var stageBindings = _program.Bindings[PipelineBase.UniformSetIndex][stageIndex]; + int bindingsCount = stageBindings.Length; + int count; + + for (int bindingIndex = 0; bindingIndex < bindingsCount; bindingIndex += count) + { + int binding = stageBindings[bindingIndex]; + count = 1; + + while (bindingIndex + count < bindingsCount && stageBindings[bindingIndex + count] == binding + count) + { + count++; + } + + bool doUpdate = false; + + for (int i = 0; i < count; i++) + { + int index = binding + i; + + if (!_uniformSet[index]) + { + UpdateBuffer(cbs, ref _uniformBuffers[index], _uniformBufferRefs[index], dummyBuffer); + _uniformSet[index] = true; + doUpdate = true; + } + } + + if (doUpdate) + { + ReadOnlySpan uniformBuffers = _uniformBuffers; + UpdateBuffers(cbs, pbp, binding, uniformBuffers.Slice(binding, count), DescriptorType.UniformBuffer); + } + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Initialize(CommandBufferScoped cbs, int setIndex, DescriptorSetCollection dsc) + { + var dummyBuffer = _dummyBuffer?.GetBuffer().Get(cbs).Value ?? default; + + uint stages = _program.Stages; + + while (stages != 0) + { + int stage = BitOperations.TrailingZeroCount(stages); + stages &= ~(1u << stage); + + if (setIndex == PipelineBase.UniformSetIndex) + { + dsc.InitializeBuffers( + 0, + 1 + stage * Constants.MaxUniformBuffersPerStage, + Constants.MaxUniformBuffersPerStage, + DescriptorType.UniformBuffer, + dummyBuffer); + } + else if (setIndex == PipelineBase.StorageSetIndex) + { + dsc.InitializeBuffers( + 0, + stage * Constants.MaxStorageBuffersPerStage, + Constants.MaxStorageBuffersPerStage, + DescriptorType.StorageBuffer, + dummyBuffer); + } + } + } + + public void SignalCommandBufferChange() + { + _dirty = DirtyFlags.All; + + Array.Clear(_uniformSet); + Array.Clear(_storageSet); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _dummyTexture.Dispose(); + _dummySampler.Dispose(); + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DisposableBuffer.cs b/Ryujinx.Graphics.Vulkan/DisposableBuffer.cs new file mode 100644 index 000000000..a62b0d149 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DisposableBuffer.cs @@ -0,0 +1,24 @@ +using Silk.NET.Vulkan; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DisposableBuffer : System.IDisposable + { + private readonly Vk _api; + private readonly Device _device; + + public Buffer Value { get; } + + public DisposableBuffer(Vk api, Device device, Buffer buffer) + { + _api = api; + _device = device; + Value = buffer; + } + + public unsafe void Dispose() + { + _api.DestroyBuffer(_device, Value, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DisposableBufferView.cs b/Ryujinx.Graphics.Vulkan/DisposableBufferView.cs new file mode 100644 index 000000000..41d905f12 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DisposableBufferView.cs @@ -0,0 +1,24 @@ +using Silk.NET.Vulkan; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DisposableBufferView : System.IDisposable + { + private readonly Vk _api; + private readonly Device _device; + + public BufferView Value { get; } + + public DisposableBufferView(Vk api, Device device, BufferView bufferView) + { + _api = api; + _device = device; + Value = bufferView; + } + + public unsafe void Dispose() + { + _api.DestroyBufferView(_device, Value, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DisposableFramebuffer.cs b/Ryujinx.Graphics.Vulkan/DisposableFramebuffer.cs new file mode 100644 index 000000000..f8436ddc5 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DisposableFramebuffer.cs @@ -0,0 +1,25 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DisposableFramebuffer : IDisposable + { + private readonly Vk _api; + private readonly Device _device; + + public Framebuffer Value { get; } + + public DisposableFramebuffer(Vk api, Device device, Framebuffer framebuffer) + { + _api = api; + _device = device; + Value = framebuffer; + } + + public unsafe void Dispose() + { + _api.DestroyFramebuffer(_device, Value, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DisposableImage.cs b/Ryujinx.Graphics.Vulkan/DisposableImage.cs new file mode 100644 index 000000000..d10cb7f8e --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DisposableImage.cs @@ -0,0 +1,25 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DisposableImage : IDisposable + { + private readonly Vk _api; + private readonly Device _device; + + public Image Value { get; } + + public DisposableImage(Vk api, Device device, Image image) + { + _api = api; + _device = device; + Value = image; + } + + public unsafe void Dispose() + { + _api.DestroyImage(_device, Value, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DisposableImageView.cs b/Ryujinx.Graphics.Vulkan/DisposableImageView.cs new file mode 100644 index 000000000..e74230f8e --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DisposableImageView.cs @@ -0,0 +1,25 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DisposableImageView : IDisposable + { + private readonly Vk _api; + private readonly Device _device; + + public ImageView Value { get; } + + public DisposableImageView(Vk api, Device device, ImageView imageView) + { + _api = api; + _device = device; + Value = imageView; + } + + public unsafe void Dispose() + { + _api.DestroyImageView(_device, Value, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DisposableMemory.cs b/Ryujinx.Graphics.Vulkan/DisposableMemory.cs new file mode 100644 index 000000000..2dedb5842 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DisposableMemory.cs @@ -0,0 +1,24 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DisposableMemory : IDisposable + { + private readonly Vk _api; + private readonly Device _device; + private readonly DeviceMemory _memory; + + public DisposableMemory(Vk api, Device device, DeviceMemory memory) + { + _api = api; + _device = device; + _memory = memory; + } + + public unsafe void Dispose() + { + _api.FreeMemory(_device, _memory, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DisposablePipeline.cs b/Ryujinx.Graphics.Vulkan/DisposablePipeline.cs new file mode 100644 index 000000000..2cbca42d9 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DisposablePipeline.cs @@ -0,0 +1,25 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DisposablePipeline : IDisposable + { + private readonly Vk _api; + private readonly Device _device; + + public Pipeline Value { get; } + + public DisposablePipeline(Vk api, Device device, Pipeline pipeline) + { + _api = api; + _device = device; + Value = pipeline; + } + + public unsafe void Dispose() + { + _api.DestroyPipeline(_device, Value, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DisposableRenderPass.cs b/Ryujinx.Graphics.Vulkan/DisposableRenderPass.cs new file mode 100644 index 000000000..e3f0d0e65 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DisposableRenderPass.cs @@ -0,0 +1,25 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DisposableRenderPass : IDisposable + { + private readonly Vk _api; + private readonly Device _device; + + public RenderPass Value { get; } + + public DisposableRenderPass(Vk api, Device device, RenderPass renderPass) + { + _api = api; + _device = device; + Value = renderPass; + } + + public unsafe void Dispose() + { + _api.DestroyRenderPass(_device, Value, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/DisposableSampler.cs b/Ryujinx.Graphics.Vulkan/DisposableSampler.cs new file mode 100644 index 000000000..89b964da4 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/DisposableSampler.cs @@ -0,0 +1,25 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct DisposableSampler : IDisposable + { + private readonly Vk _api; + private readonly Device _device; + + public Sampler Value { get; } + + public DisposableSampler(Vk api, Device device, Sampler sampler) + { + _api = api; + _device = device; + Value = sampler; + } + + public unsafe void Dispose() + { + _api.DestroySampler(_device, Value, null); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/EnumConversion.cs b/Ryujinx.Graphics.Vulkan/EnumConversion.cs new file mode 100644 index 000000000..a342b18d8 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/EnumConversion.cs @@ -0,0 +1,520 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Silk.NET.Vulkan; + +namespace Ryujinx.Graphics.Vulkan +{ + static class EnumConversion + { + public static ShaderStageFlags Convert(this ShaderStage stage) + { + switch (stage) + { + case ShaderStage.Vertex: + return ShaderStageFlags.ShaderStageVertexBit; + case ShaderStage.Geometry: + return ShaderStageFlags.ShaderStageGeometryBit; + case ShaderStage.TessellationControl: + return ShaderStageFlags.ShaderStageTessellationControlBit; + case ShaderStage.TessellationEvaluation: + return ShaderStageFlags.ShaderStageTessellationEvaluationBit; + case ShaderStage.Fragment: + return ShaderStageFlags.ShaderStageFragmentBit; + case ShaderStage.Compute: + return ShaderStageFlags.ShaderStageComputeBit; + }; + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(ShaderStage)} enum value: {stage}."); + + return 0; + } + + public static PipelineStageFlags ConvertToPipelineStageFlags(this ShaderStage stage) + { + switch (stage) + { + case ShaderStage.Vertex: + return PipelineStageFlags.PipelineStageVertexShaderBit; + case ShaderStage.Geometry: + return PipelineStageFlags.PipelineStageGeometryShaderBit; + case ShaderStage.TessellationControl: + return PipelineStageFlags.PipelineStageTessellationControlShaderBit; + case ShaderStage.TessellationEvaluation: + return PipelineStageFlags.PipelineStageTessellationEvaluationShaderBit; + case ShaderStage.Fragment: + return PipelineStageFlags.PipelineStageFragmentShaderBit; + case ShaderStage.Compute: + return PipelineStageFlags.PipelineStageComputeShaderBit; + }; + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(ShaderStage)} enum value: {stage}."); + + return 0; + } + + public static SamplerAddressMode Convert(this AddressMode mode) + { + switch (mode) + { + case AddressMode.Clamp: + return SamplerAddressMode.ClampToEdge; // TODO: Should be clamp. + case AddressMode.Repeat: + return SamplerAddressMode.Repeat; + case AddressMode.MirrorClamp: + return SamplerAddressMode.ClampToEdge; // TODO: Should be mirror clamp. + case AddressMode.MirrorClampToEdge: + return SamplerAddressMode.MirrorClampToEdgeKhr; + case AddressMode.MirrorClampToBorder: + return SamplerAddressMode.ClampToBorder; // TODO: Should be mirror clamp to border. + case AddressMode.ClampToBorder: + return SamplerAddressMode.ClampToBorder; + case AddressMode.MirroredRepeat: + return SamplerAddressMode.MirroredRepeat; + case AddressMode.ClampToEdge: + return SamplerAddressMode.ClampToEdge; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(AddressMode)} enum value: {mode}."); + + return SamplerAddressMode.ClampToEdge; // TODO: Should be clamp. + } + + public static Silk.NET.Vulkan.BlendFactor Convert(this GAL.BlendFactor factor) + { + switch (factor) + { + case GAL.BlendFactor.Zero: + case GAL.BlendFactor.ZeroGl: + return Silk.NET.Vulkan.BlendFactor.Zero; + case GAL.BlendFactor.One: + case GAL.BlendFactor.OneGl: + return Silk.NET.Vulkan.BlendFactor.One; + case GAL.BlendFactor.SrcColor: + case GAL.BlendFactor.SrcColorGl: + return Silk.NET.Vulkan.BlendFactor.SrcColor; + case GAL.BlendFactor.OneMinusSrcColor: + case GAL.BlendFactor.OneMinusSrcColorGl: + return Silk.NET.Vulkan.BlendFactor.OneMinusSrcColor; + case GAL.BlendFactor.SrcAlpha: + case GAL.BlendFactor.SrcAlphaGl: + return Silk.NET.Vulkan.BlendFactor.SrcAlpha; + case GAL.BlendFactor.OneMinusSrcAlpha: + case GAL.BlendFactor.OneMinusSrcAlphaGl: + return Silk.NET.Vulkan.BlendFactor.OneMinusSrcAlpha; + case GAL.BlendFactor.DstAlpha: + case GAL.BlendFactor.DstAlphaGl: + return Silk.NET.Vulkan.BlendFactor.DstAlpha; + case GAL.BlendFactor.OneMinusDstAlpha: + case GAL.BlendFactor.OneMinusDstAlphaGl: + return Silk.NET.Vulkan.BlendFactor.OneMinusDstAlpha; + case GAL.BlendFactor.DstColor: + case GAL.BlendFactor.DstColorGl: + return Silk.NET.Vulkan.BlendFactor.DstColor; + case GAL.BlendFactor.OneMinusDstColor: + case GAL.BlendFactor.OneMinusDstColorGl: + return Silk.NET.Vulkan.BlendFactor.OneMinusDstColor; + case GAL.BlendFactor.SrcAlphaSaturate: + case GAL.BlendFactor.SrcAlphaSaturateGl: + return Silk.NET.Vulkan.BlendFactor.SrcAlphaSaturate; + case GAL.BlendFactor.Src1Color: + case GAL.BlendFactor.Src1ColorGl: + return Silk.NET.Vulkan.BlendFactor.Src1Color; + case GAL.BlendFactor.OneMinusSrc1Color: + case GAL.BlendFactor.OneMinusSrc1ColorGl: + return Silk.NET.Vulkan.BlendFactor.OneMinusSrc1Color; + case GAL.BlendFactor.Src1Alpha: + case GAL.BlendFactor.Src1AlphaGl: + return Silk.NET.Vulkan.BlendFactor.Src1Alpha; + case GAL.BlendFactor.OneMinusSrc1Alpha: + case GAL.BlendFactor.OneMinusSrc1AlphaGl: + return Silk.NET.Vulkan.BlendFactor.OneMinusSrc1Alpha; + case GAL.BlendFactor.ConstantColor: + return Silk.NET.Vulkan.BlendFactor.ConstantColor; + case GAL.BlendFactor.OneMinusConstantColor: + return Silk.NET.Vulkan.BlendFactor.OneMinusConstantColor; + case GAL.BlendFactor.ConstantAlpha: + return Silk.NET.Vulkan.BlendFactor.ConstantAlpha; + case GAL.BlendFactor.OneMinusConstantAlpha: + return Silk.NET.Vulkan.BlendFactor.OneMinusConstantAlpha; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(GAL.BlendFactor)} enum value: {factor}."); + + return Silk.NET.Vulkan.BlendFactor.Zero; + } + + public static Silk.NET.Vulkan.BlendOp Convert(this GAL.BlendOp op) + { + switch (op) + { + case GAL.BlendOp.Add: + case GAL.BlendOp.AddGl: + return Silk.NET.Vulkan.BlendOp.Add; + case GAL.BlendOp.Subtract: + case GAL.BlendOp.SubtractGl: + return Silk.NET.Vulkan.BlendOp.Subtract; + case GAL.BlendOp.ReverseSubtract: + case GAL.BlendOp.ReverseSubtractGl: + return Silk.NET.Vulkan.BlendOp.ReverseSubtract; + case GAL.BlendOp.Minimum: + case GAL.BlendOp.MinimumGl: + return Silk.NET.Vulkan.BlendOp.Min; + case GAL.BlendOp.Maximum: + case GAL.BlendOp.MaximumGl: + return Silk.NET.Vulkan.BlendOp.Max; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(GAL.BlendOp)} enum value: {op}."); + + return Silk.NET.Vulkan.BlendOp.Add; + } + + public static Silk.NET.Vulkan.CompareOp Convert(this GAL.CompareOp op) + { + switch (op) + { + case GAL.CompareOp.Never: + case GAL.CompareOp.NeverGl: + return Silk.NET.Vulkan.CompareOp.Never; + case GAL.CompareOp.Less: + case GAL.CompareOp.LessGl: + return Silk.NET.Vulkan.CompareOp.Less; + case GAL.CompareOp.Equal: + case GAL.CompareOp.EqualGl: + return Silk.NET.Vulkan.CompareOp.Equal; + case GAL.CompareOp.LessOrEqual: + case GAL.CompareOp.LessOrEqualGl: + return Silk.NET.Vulkan.CompareOp.LessOrEqual; + case GAL.CompareOp.Greater: + case GAL.CompareOp.GreaterGl: + return Silk.NET.Vulkan.CompareOp.Greater; + case GAL.CompareOp.NotEqual: + case GAL.CompareOp.NotEqualGl: + return Silk.NET.Vulkan.CompareOp.NotEqual; + case GAL.CompareOp.GreaterOrEqual: + case GAL.CompareOp.GreaterOrEqualGl: + return Silk.NET.Vulkan.CompareOp.GreaterOrEqual; + case GAL.CompareOp.Always: + case GAL.CompareOp.AlwaysGl: + return Silk.NET.Vulkan.CompareOp.Always; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(GAL.CompareOp)} enum value: {op}."); + + return Silk.NET.Vulkan.CompareOp.Never; + } + + public static CullModeFlags Convert(this Face face) + { + switch (face) + { + case Face.Back: + return CullModeFlags.CullModeBackBit; + case Face.Front: + return CullModeFlags.CullModeFrontBit; + case Face.FrontAndBack: + return CullModeFlags.CullModeFrontAndBack; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(Face)} enum value: {face}."); + + return CullModeFlags.CullModeBackBit; + } + + public static Silk.NET.Vulkan.FrontFace Convert(this GAL.FrontFace frontFace) + { + // Flipped to account for origin differences. + switch (frontFace) + { + case GAL.FrontFace.Clockwise: + return Silk.NET.Vulkan.FrontFace.CounterClockwise; + case GAL.FrontFace.CounterClockwise: + return Silk.NET.Vulkan.FrontFace.Clockwise; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(GAL.FrontFace)} enum value: {frontFace}."); + + return Silk.NET.Vulkan.FrontFace.Clockwise; + } + + public static Silk.NET.Vulkan.IndexType Convert(this GAL.IndexType type) + { + switch (type) + { + case GAL.IndexType.UByte: + return Silk.NET.Vulkan.IndexType.Uint8Ext; + case GAL.IndexType.UShort: + return Silk.NET.Vulkan.IndexType.Uint16; + case GAL.IndexType.UInt: + return Silk.NET.Vulkan.IndexType.Uint32; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(GAL.IndexType)} enum value: {type}."); + + return Silk.NET.Vulkan.IndexType.Uint16; + } + + public static Filter Convert(this MagFilter filter) + { + switch (filter) + { + case MagFilter.Nearest: + return Filter.Nearest; + case MagFilter.Linear: + return Filter.Linear; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(MagFilter)} enum value: {filter}."); + + return Filter.Nearest; + } + + public static (Filter, SamplerMipmapMode) Convert(this MinFilter filter) + { + switch (filter) + { + case MinFilter.Nearest: + return (Filter.Nearest, SamplerMipmapMode.Nearest); + case MinFilter.Linear: + return (Filter.Linear, SamplerMipmapMode.Nearest); + case MinFilter.NearestMipmapNearest: + return (Filter.Nearest, SamplerMipmapMode.Nearest); + case MinFilter.LinearMipmapNearest: + return (Filter.Linear, SamplerMipmapMode.Nearest); + case MinFilter.NearestMipmapLinear: + return (Filter.Nearest, SamplerMipmapMode.Linear); + case MinFilter.LinearMipmapLinear: + return (Filter.Linear, SamplerMipmapMode.Linear); + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(MinFilter)} enum value: {filter}."); + + return (Filter.Nearest, SamplerMipmapMode.Nearest); + } + + public static Silk.NET.Vulkan.PrimitiveTopology Convert(this GAL.PrimitiveTopology topology) + { + switch (topology) + { + case GAL.PrimitiveTopology.Points: + return Silk.NET.Vulkan.PrimitiveTopology.PointList; + case GAL.PrimitiveTopology.Lines: + return Silk.NET.Vulkan.PrimitiveTopology.LineList; + case GAL.PrimitiveTopology.LineStrip: + return Silk.NET.Vulkan.PrimitiveTopology.LineStrip; + case GAL.PrimitiveTopology.Triangles: + return Silk.NET.Vulkan.PrimitiveTopology.TriangleList; + case GAL.PrimitiveTopology.TriangleStrip: + return Silk.NET.Vulkan.PrimitiveTopology.TriangleStrip; + case GAL.PrimitiveTopology.TriangleFan: + return Silk.NET.Vulkan.PrimitiveTopology.TriangleFan; + case GAL.PrimitiveTopology.LinesAdjacency: + return Silk.NET.Vulkan.PrimitiveTopology.LineListWithAdjacency; + case GAL.PrimitiveTopology.LineStripAdjacency: + return Silk.NET.Vulkan.PrimitiveTopology.LineStripWithAdjacency; + case GAL.PrimitiveTopology.TrianglesAdjacency: + return Silk.NET.Vulkan.PrimitiveTopology.TriangleListWithAdjacency; + case GAL.PrimitiveTopology.TriangleStripAdjacency: + return Silk.NET.Vulkan.PrimitiveTopology.TriangleStripWithAdjacency; + case GAL.PrimitiveTopology.Patches: + return Silk.NET.Vulkan.PrimitiveTopology.PatchList; + case GAL.PrimitiveTopology.Quads: // Emulated with triangle fans. + return Silk.NET.Vulkan.PrimitiveTopology.TriangleFan; + case GAL.PrimitiveTopology.QuadStrip: // Emulated with triangle strips. + return Silk.NET.Vulkan.PrimitiveTopology.TriangleStrip; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(GAL.PrimitiveTopology)} enum value: {topology}."); + + return Silk.NET.Vulkan.PrimitiveTopology.TriangleList; + } + + public static Silk.NET.Vulkan.StencilOp Convert(this GAL.StencilOp op) + { + switch (op) + { + case GAL.StencilOp.Keep: + case GAL.StencilOp.KeepGl: + return Silk.NET.Vulkan.StencilOp.Keep; + case GAL.StencilOp.Zero: + case GAL.StencilOp.ZeroGl: + return Silk.NET.Vulkan.StencilOp.Zero; + case GAL.StencilOp.Replace: + case GAL.StencilOp.ReplaceGl: + return Silk.NET.Vulkan.StencilOp.Replace; + case GAL.StencilOp.IncrementAndClamp: + case GAL.StencilOp.IncrementAndClampGl: + return Silk.NET.Vulkan.StencilOp.IncrementAndClamp; + case GAL.StencilOp.DecrementAndClamp: + case GAL.StencilOp.DecrementAndClampGl: + return Silk.NET.Vulkan.StencilOp.DecrementAndClamp; + case GAL.StencilOp.Invert: + case GAL.StencilOp.InvertGl: + return Silk.NET.Vulkan.StencilOp.Invert; + case GAL.StencilOp.IncrementAndWrap: + case GAL.StencilOp.IncrementAndWrapGl: + return Silk.NET.Vulkan.StencilOp.IncrementAndWrap; + case GAL.StencilOp.DecrementAndWrap: + case GAL.StencilOp.DecrementAndWrapGl: + return Silk.NET.Vulkan.StencilOp.DecrementAndWrap; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(GAL.StencilOp)} enum value: {op}."); + + return Silk.NET.Vulkan.StencilOp.Keep; + } + + public static ComponentSwizzle Convert(this SwizzleComponent swizzleComponent) + { + switch (swizzleComponent) + { + case SwizzleComponent.Zero: + return ComponentSwizzle.Zero; + case SwizzleComponent.One: + return ComponentSwizzle.One; + case SwizzleComponent.Red: + return ComponentSwizzle.R; + case SwizzleComponent.Green: + return ComponentSwizzle.G; + case SwizzleComponent.Blue: + return ComponentSwizzle.B; + case SwizzleComponent.Alpha: + return ComponentSwizzle.A; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(SwizzleComponent)} enum value: {swizzleComponent}."); + + return ComponentSwizzle.Zero; + } + + public static ImageType Convert(this Target target) + { + switch (target) + { + case Target.Texture1D: + case Target.Texture1DArray: + case Target.TextureBuffer: + return ImageType.ImageType1D; + case Target.Texture2D: + case Target.Texture2DArray: + case Target.Texture2DMultisample: + case Target.Cubemap: + case Target.CubemapArray: + return ImageType.ImageType2D; + case Target.Texture3D: + return ImageType.ImageType3D; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(Target)} enum value: {target}."); + + return ImageType.ImageType2D; + } + + public static ImageViewType ConvertView(this Target target) + { + switch (target) + { + case Target.Texture1D: + return ImageViewType.ImageViewType1D; + case Target.Texture2D: + case Target.Texture2DMultisample: + return ImageViewType.ImageViewType2D; + case Target.Texture3D: + return ImageViewType.ImageViewType3D; + case Target.Texture1DArray: + return ImageViewType.ImageViewType1DArray; + case Target.Texture2DArray: + return ImageViewType.ImageViewType2DArray; + case Target.Cubemap: + return ImageViewType.Cube; + case Target.CubemapArray: + return ImageViewType.CubeArray; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(Target)} enum value: {target}."); + + return ImageViewType.ImageViewType2D; + } + + public static ImageAspectFlags ConvertAspectFlags(this GAL.Format format) + { + switch (format) + { + case GAL.Format.D16Unorm: + case GAL.Format.D32Float: + return ImageAspectFlags.ImageAspectDepthBit; + case GAL.Format.S8Uint: + + return ImageAspectFlags.ImageAspectStencilBit; + case GAL.Format.D24UnormS8Uint: + case GAL.Format.D32FloatS8Uint: + case GAL.Format.S8UintD24Unorm: + return ImageAspectFlags.ImageAspectDepthBit | ImageAspectFlags.ImageAspectStencilBit; + default: + return ImageAspectFlags.ImageAspectColorBit; + } + } + + public static ImageAspectFlags ConvertAspectFlags(this GAL.Format format, DepthStencilMode depthStencilMode) + { + switch (format) + { + case GAL.Format.D16Unorm: + case GAL.Format.D32Float: + return ImageAspectFlags.ImageAspectDepthBit; + case GAL.Format.S8Uint: + return ImageAspectFlags.ImageAspectStencilBit; + case GAL.Format.D24UnormS8Uint: + case GAL.Format.D32FloatS8Uint: + case GAL.Format.S8UintD24Unorm: + return depthStencilMode == DepthStencilMode.Stencil ? ImageAspectFlags.ImageAspectStencilBit : ImageAspectFlags.ImageAspectDepthBit; + default: + return ImageAspectFlags.ImageAspectColorBit; + } + } + + public static LogicOp Convert(this LogicalOp op) + { + switch (op) + { + case LogicalOp.Clear: + return LogicOp.Clear; + case LogicalOp.And: + return LogicOp.And; + case LogicalOp.AndReverse: + return LogicOp.AndReverse; + case LogicalOp.Copy: + return LogicOp.Copy; + case LogicalOp.AndInverted: + return LogicOp.AndInverted; + case LogicalOp.Noop: + return LogicOp.NoOp; + case LogicalOp.Xor: + return LogicOp.Xor; + case LogicalOp.Or: + return LogicOp.Or; + case LogicalOp.Nor: + return LogicOp.Nor; + case LogicalOp.Equiv: + return LogicOp.Equivalent; + case LogicalOp.Invert: + return LogicOp.Invert; + case LogicalOp.OrReverse: + return LogicOp.OrReverse; + case LogicalOp.CopyInverted: + return LogicOp.CopyInverted; + case LogicalOp.OrInverted: + return LogicOp.OrInverted; + case LogicalOp.Nand: + return LogicOp.Nand; + case LogicalOp.Set: + return LogicOp.Set; + } + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(LogicalOp)} enum value: {op}."); + + return LogicOp.Copy; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/FenceHelper.cs b/Ryujinx.Graphics.Vulkan/FenceHelper.cs new file mode 100644 index 000000000..d6731c0eb --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/FenceHelper.cs @@ -0,0 +1,30 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + static class FenceHelper + { + private const ulong DefaultTimeout = 100000000; // 100ms + + public static bool AnySignaled(Vk api, Device device, ReadOnlySpan fences, ulong timeout = 0) + { + return api.WaitForFences(device, (uint)fences.Length, fences, false, timeout) == Result.Success; + } + + public static bool AllSignaled(Vk api, Device device, ReadOnlySpan fences, ulong timeout = 0) + { + return api.WaitForFences(device, (uint)fences.Length, fences, true, timeout) == Result.Success; + } + + public static void WaitAllIndefinitely(Vk api, Device device, ReadOnlySpan fences) + { + Result result; + while ((result = api.WaitForFences(device, (uint)fences.Length, fences, true, DefaultTimeout)) == Result.Timeout) + { + // Keep waiting while the fence is not signaled. + } + result.ThrowOnError(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/FenceHolder.cs b/Ryujinx.Graphics.Vulkan/FenceHolder.cs new file mode 100644 index 000000000..ba5b18829 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/FenceHolder.cs @@ -0,0 +1,73 @@ +using Silk.NET.Vulkan; +using System; +using System.Threading; + +namespace Ryujinx.Graphics.Vulkan +{ + class FenceHolder : IDisposable + { + private readonly Vk _api; + private readonly Device _device; + private Fence _fence; + private int _referenceCount; + public bool _disposed; + + public unsafe FenceHolder(Vk api, Device device) + { + _api = api; + _device = device; + + var fenceCreateInfo = new FenceCreateInfo() + { + SType = StructureType.FenceCreateInfo + }; + + api.CreateFence(device, in fenceCreateInfo, null, out _fence).ThrowOnError(); + + _referenceCount = 1; + } + + public Fence GetUnsafe() + { + return _fence; + } + + public Fence Get() + { + Interlocked.Increment(ref _referenceCount); + return _fence; + } + + public unsafe void Put() + { + if (Interlocked.Decrement(ref _referenceCount) == 0) + { + _api.DestroyFence(_device, _fence, null); + _fence = default; + } + } + + public void Wait() + { + Span fences = stackalloc Fence[1]; + fences[0] = _fence; + FenceHelper.WaitAllIndefinitely(_api, _device, fences); + } + + public bool IsSignaled() + { + Span fences = stackalloc Fence[1]; + fences[0] = _fence; + return FenceHelper.AllSignaled(_api, _device, fences); + } + + public void Dispose() + { + if (!_disposed) + { + Put(); + _disposed = true; + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/FormatCapabilities.cs b/Ryujinx.Graphics.Vulkan/FormatCapabilities.cs new file mode 100644 index 000000000..6159f2cca --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/FormatCapabilities.cs @@ -0,0 +1,93 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class FormatCapabilities + { + private readonly FormatFeatureFlags[] _table; + + private readonly Vk _api; + private readonly PhysicalDevice _physicalDevice; + + public FormatCapabilities(Vk api, PhysicalDevice physicalDevice) + { + _api = api; + _physicalDevice = physicalDevice; + _table = new FormatFeatureFlags[Enum.GetNames(typeof(GAL.Format)).Length]; + } + + public bool FormatsSupports(FormatFeatureFlags flags, params GAL.Format[] formats) + { + foreach (GAL.Format format in formats) + { + if (!FormatSupports(flags, format)) + { + return false; + } + } + + return true; + } + + public bool FormatSupports(FormatFeatureFlags flags, GAL.Format format) + { + var formatFeatureFlags = _table[(int)format]; + + if (formatFeatureFlags == 0) + { + _api.GetPhysicalDeviceFormatProperties(_physicalDevice, FormatTable.GetFormat(format), out var fp); + formatFeatureFlags = fp.OptimalTilingFeatures; + _table[(int)format] = formatFeatureFlags; + } + + return (formatFeatureFlags & flags) == flags; + } + + public VkFormat ConvertToVkFormat(GAL.Format srcFormat) + { + var format = FormatTable.GetFormat(srcFormat); + + var requiredFeatures = FormatFeatureFlags.FormatFeatureSampledImageBit | + FormatFeatureFlags.FormatFeatureTransferSrcBit | + FormatFeatureFlags.FormatFeatureTransferDstBit; + + if (srcFormat.IsDepthOrStencil()) + { + requiredFeatures |= FormatFeatureFlags.FormatFeatureDepthStencilAttachmentBit; + } + else if (srcFormat.IsRtColorCompatible()) + { + requiredFeatures |= FormatFeatureFlags.FormatFeatureColorAttachmentBit; + } + + if (srcFormat.IsImageCompatible()) + { + requiredFeatures |= FormatFeatureFlags.FormatFeatureStorageImageBit; + } + + if (!FormatSupports(requiredFeatures, srcFormat) || (IsD24S8(srcFormat) && VulkanConfiguration.ForceD24S8Unsupported)) + { + // The format is not supported. Can we convert it to a higher precision format? + if (IsD24S8(srcFormat)) + { + format = VkFormat.D32SfloatS8Uint; + } + else + { + Logger.Error?.Print(LogClass.Gpu, $"Format {srcFormat} is not supported by the host."); + } + } + + return format; + } + + public static bool IsD24S8(GAL.Format format) + { + return format == GAL.Format.D24UnormS8Uint || format == GAL.Format.S8UintD24Unorm; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/FormatConverter.cs b/Ryujinx.Graphics.Vulkan/FormatConverter.cs new file mode 100644 index 000000000..e42bd778a --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/FormatConverter.cs @@ -0,0 +1,49 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Vulkan +{ + class FormatConverter + { + public unsafe static void ConvertD24S8ToD32FS8(Span output, ReadOnlySpan input) + { + const float UnormToFloat = 1f / 0xffffff; + + Span outputUint = MemoryMarshal.Cast(output); + ReadOnlySpan inputUint = MemoryMarshal.Cast(input); + + int i = 0; + + for (; i < inputUint.Length; i++) + { + uint depthStencil = inputUint[i]; + uint depth = depthStencil >> 8; + uint stencil = depthStencil & 0xff; + + int j = i * 2; + + outputUint[j] = (uint)BitConverter.SingleToInt32Bits(depth * UnormToFloat); + outputUint[j + 1] = stencil; + } + } + + public unsafe static void ConvertD32FS8ToD24S8(Span output, ReadOnlySpan input) + { + Span outputUint = MemoryMarshal.Cast(output); + ReadOnlySpan inputUint = MemoryMarshal.Cast(input); + + int i = 0; + + for (; i < inputUint.Length; i += 2) + { + float depth = BitConverter.Int32BitsToSingle((int)inputUint[i]); + uint stencil = inputUint[i + 1]; + uint depthStencil = (Math.Clamp((uint)(depth * 0xffffff), 0, 0xffffff) << 8) | (stencil & 0xff); + + int j = i >> 1; + + outputUint[j] = depthStencil; + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/FormatTable.cs b/Ryujinx.Graphics.Vulkan/FormatTable.cs new file mode 100644 index 000000000..439d492ce --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/FormatTable.cs @@ -0,0 +1,182 @@ +using Ryujinx.Graphics.GAL; +using System; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + static class FormatTable + { + private static readonly VkFormat[] Table; + + static FormatTable() + { + Table = new VkFormat[Enum.GetNames(typeof(Format)).Length]; + + Add(Format.R8Unorm, VkFormat.R8Unorm); + Add(Format.R8Snorm, VkFormat.R8SNorm); + Add(Format.R8Uint, VkFormat.R8Uint); + Add(Format.R8Sint, VkFormat.R8Sint); + Add(Format.R16Float, VkFormat.R16Sfloat); + Add(Format.R16Unorm, VkFormat.R16Unorm); + Add(Format.R16Snorm, VkFormat.R16SNorm); + Add(Format.R16Uint, VkFormat.R16Uint); + Add(Format.R16Sint, VkFormat.R16Sint); + Add(Format.R32Float, VkFormat.R32Sfloat); + Add(Format.R32Uint, VkFormat.R32Uint); + Add(Format.R32Sint, VkFormat.R32Sint); + Add(Format.R8G8Unorm, VkFormat.R8G8Unorm); + Add(Format.R8G8Snorm, VkFormat.R8G8SNorm); + Add(Format.R8G8Uint, VkFormat.R8G8Uint); + Add(Format.R8G8Sint, VkFormat.R8G8Sint); + Add(Format.R16G16Float, VkFormat.R16G16Sfloat); + Add(Format.R16G16Unorm, VkFormat.R16G16Unorm); + Add(Format.R16G16Snorm, VkFormat.R16G16SNorm); + Add(Format.R16G16Uint, VkFormat.R16G16Uint); + Add(Format.R16G16Sint, VkFormat.R16G16Sint); + Add(Format.R32G32Float, VkFormat.R32G32Sfloat); + Add(Format.R32G32Uint, VkFormat.R32G32Uint); + Add(Format.R32G32Sint, VkFormat.R32G32Sint); + Add(Format.R8G8B8Unorm, VkFormat.R8G8B8Unorm); + Add(Format.R8G8B8Snorm, VkFormat.R8G8B8SNorm); + Add(Format.R8G8B8Uint, VkFormat.R8G8B8Uint); + Add(Format.R8G8B8Sint, VkFormat.R8G8B8Sint); + Add(Format.R16G16B16Float, VkFormat.R16G16B16Sfloat); + Add(Format.R16G16B16Unorm, VkFormat.R16G16B16Unorm); + Add(Format.R16G16B16Snorm, VkFormat.R16G16B16SNorm); + Add(Format.R16G16B16Uint, VkFormat.R16G16B16Uint); + Add(Format.R16G16B16Sint, VkFormat.R16G16B16Sint); + Add(Format.R32G32B32Float, VkFormat.R32G32B32Sfloat); + Add(Format.R32G32B32Uint, VkFormat.R32G32B32Uint); + Add(Format.R32G32B32Sint, VkFormat.R32G32B32Sint); + Add(Format.R8G8B8A8Unorm, VkFormat.R8G8B8A8Unorm); + Add(Format.R8G8B8A8Snorm, VkFormat.R8G8B8A8SNorm); + Add(Format.R8G8B8A8Uint, VkFormat.R8G8B8A8Uint); + Add(Format.R8G8B8A8Sint, VkFormat.R8G8B8A8Sint); + Add(Format.R16G16B16A16Float, VkFormat.R16G16B16A16Sfloat); + Add(Format.R16G16B16A16Unorm, VkFormat.R16G16B16A16Unorm); + Add(Format.R16G16B16A16Snorm, VkFormat.R16G16B16A16SNorm); + Add(Format.R16G16B16A16Uint, VkFormat.R16G16B16A16Uint); + Add(Format.R16G16B16A16Sint, VkFormat.R16G16B16A16Sint); + Add(Format.R32G32B32A32Float, VkFormat.R32G32B32A32Sfloat); + Add(Format.R32G32B32A32Uint, VkFormat.R32G32B32A32Uint); + Add(Format.R32G32B32A32Sint, VkFormat.R32G32B32A32Sint); + Add(Format.S8Uint, VkFormat.S8Uint); + Add(Format.D16Unorm, VkFormat.D16Unorm); + Add(Format.S8UintD24Unorm, VkFormat.D24UnormS8Uint); + Add(Format.D32Float, VkFormat.D32Sfloat); + Add(Format.D24UnormS8Uint, VkFormat.D24UnormS8Uint); + Add(Format.D32FloatS8Uint, VkFormat.D32SfloatS8Uint); + Add(Format.R8G8B8X8Srgb, VkFormat.R8G8B8Srgb); + Add(Format.R8G8B8A8Srgb, VkFormat.R8G8B8A8Srgb); + Add(Format.R4G4Unorm, VkFormat.R4G4UnormPack8); + Add(Format.R4G4B4A4Unorm, VkFormat.R4G4B4A4UnormPack16); + Add(Format.R5G5B5X1Unorm, VkFormat.A1R5G5B5UnormPack16); + Add(Format.R5G5B5A1Unorm, VkFormat.A1R5G5B5UnormPack16); + Add(Format.R5G6B5Unorm, VkFormat.R5G6B5UnormPack16); + Add(Format.R10G10B10A2Unorm, VkFormat.A2B10G10R10UnormPack32); + Add(Format.R10G10B10A2Uint, VkFormat.A2B10G10R10UintPack32); + Add(Format.R11G11B10Float, VkFormat.B10G11R11UfloatPack32); + Add(Format.R9G9B9E5Float, VkFormat.E5B9G9R9UfloatPack32); + Add(Format.Bc1RgbaUnorm, VkFormat.BC1RgbaUnormBlock); + Add(Format.Bc2Unorm, VkFormat.BC2UnormBlock); + Add(Format.Bc3Unorm, VkFormat.BC3UnormBlock); + Add(Format.Bc1RgbaSrgb, VkFormat.BC1RgbaSrgbBlock); + Add(Format.Bc2Srgb, VkFormat.BC2SrgbBlock); + Add(Format.Bc3Srgb, VkFormat.BC3SrgbBlock); + Add(Format.Bc4Unorm, VkFormat.BC4UnormBlock); + Add(Format.Bc4Snorm, VkFormat.BC4SNormBlock); + Add(Format.Bc5Unorm, VkFormat.BC5UnormBlock); + Add(Format.Bc5Snorm, VkFormat.BC5SNormBlock); + Add(Format.Bc7Unorm, VkFormat.BC7UnormBlock); + Add(Format.Bc7Srgb, VkFormat.BC7SrgbBlock); + Add(Format.Bc6HSfloat, VkFormat.BC6HSfloatBlock); + Add(Format.Bc6HUfloat, VkFormat.BC6HUfloatBlock); + Add(Format.R8Uscaled, VkFormat.R8Uscaled); + Add(Format.R8Sscaled, VkFormat.R8Sscaled); + Add(Format.R16Uscaled, VkFormat.R16Uscaled); + Add(Format.R16Sscaled, VkFormat.R16Sscaled); + // Add(Format.R32Uscaled, VkFormat.R32Uscaled); + // Add(Format.R32Sscaled, VkFormat.R32Sscaled); + Add(Format.R8G8Uscaled, VkFormat.R8G8Uscaled); + Add(Format.R8G8Sscaled, VkFormat.R8G8Sscaled); + Add(Format.R16G16Uscaled, VkFormat.R16G16Uscaled); + Add(Format.R16G16Sscaled, VkFormat.R16G16Sscaled); + // Add(Format.R32G32Uscaled, VkFormat.R32G32Uscaled); + // Add(Format.R32G32Sscaled, VkFormat.R32G32Sscaled); + Add(Format.R8G8B8Uscaled, VkFormat.R8G8B8Uscaled); + Add(Format.R8G8B8Sscaled, VkFormat.R8G8B8Sscaled); + Add(Format.R16G16B16Uscaled, VkFormat.R16G16B16Uscaled); + Add(Format.R16G16B16Sscaled, VkFormat.R16G16B16Sscaled); + // Add(Format.R32G32B32Uscaled, VkFormat.R32G32B32Uscaled); + // Add(Format.R32G32B32Sscaled, VkFormat.R32G32B32Sscaled); + Add(Format.R8G8B8A8Uscaled, VkFormat.R8G8B8A8Uscaled); + Add(Format.R8G8B8A8Sscaled, VkFormat.R8G8B8A8Sscaled); + Add(Format.R16G16B16A16Uscaled, VkFormat.R16G16B16A16Uscaled); + Add(Format.R16G16B16A16Sscaled, VkFormat.R16G16B16A16Sscaled); + // Add(Format.R32G32B32A32Uscaled, VkFormat.R32G32B32A32Uscaled); + // Add(Format.R32G32B32A32Sscaled, VkFormat.R32G32B32A32Sscaled); + Add(Format.R10G10B10A2Snorm, VkFormat.A2B10G10R10SNormPack32); + Add(Format.R10G10B10A2Sint, VkFormat.A2B10G10R10SintPack32); + Add(Format.R10G10B10A2Uscaled, VkFormat.A2B10G10R10UscaledPack32); + Add(Format.R10G10B10A2Sscaled, VkFormat.A2B10G10R10SscaledPack32); + Add(Format.R8G8B8X8Unorm, VkFormat.R8G8B8Unorm); + Add(Format.R8G8B8X8Snorm, VkFormat.R8G8B8SNorm); + Add(Format.R8G8B8X8Uint, VkFormat.R8G8B8Uint); + Add(Format.R8G8B8X8Sint, VkFormat.R8G8B8Sint); + Add(Format.R16G16B16X16Float, VkFormat.R16G16B16Sfloat); + Add(Format.R16G16B16X16Unorm, VkFormat.R16G16B16Unorm); + Add(Format.R16G16B16X16Snorm, VkFormat.R16G16B16SNorm); + Add(Format.R16G16B16X16Uint, VkFormat.R16G16B16Uint); + Add(Format.R16G16B16X16Sint, VkFormat.R16G16B16Sint); + Add(Format.R32G32B32X32Float, VkFormat.R32G32B32Sfloat); + Add(Format.R32G32B32X32Uint, VkFormat.R32G32B32Uint); + Add(Format.R32G32B32X32Sint, VkFormat.R32G32B32Sint); + Add(Format.Astc4x4Unorm, VkFormat.Astc4x4UnormBlock); + Add(Format.Astc5x4Unorm, VkFormat.Astc5x4UnormBlock); + Add(Format.Astc5x5Unorm, VkFormat.Astc5x5UnormBlock); + Add(Format.Astc6x5Unorm, VkFormat.Astc6x5UnormBlock); + Add(Format.Astc6x6Unorm, VkFormat.Astc6x6UnormBlock); + Add(Format.Astc8x5Unorm, VkFormat.Astc8x5UnormBlock); + Add(Format.Astc8x6Unorm, VkFormat.Astc8x6UnormBlock); + Add(Format.Astc8x8Unorm, VkFormat.Astc8x8UnormBlock); + Add(Format.Astc10x5Unorm, VkFormat.Astc10x5UnormBlock); + Add(Format.Astc10x6Unorm, VkFormat.Astc10x6UnormBlock); + Add(Format.Astc10x8Unorm, VkFormat.Astc10x8UnormBlock); + Add(Format.Astc10x10Unorm, VkFormat.Astc10x10UnormBlock); + Add(Format.Astc12x10Unorm, VkFormat.Astc12x10UnormBlock); + Add(Format.Astc12x12Unorm, VkFormat.Astc12x12UnormBlock); + Add(Format.Astc4x4Srgb, VkFormat.Astc4x4SrgbBlock); + Add(Format.Astc5x4Srgb, VkFormat.Astc5x4SrgbBlock); + Add(Format.Astc5x5Srgb, VkFormat.Astc5x5SrgbBlock); + Add(Format.Astc6x5Srgb, VkFormat.Astc6x5SrgbBlock); + Add(Format.Astc6x6Srgb, VkFormat.Astc6x6SrgbBlock); + Add(Format.Astc8x5Srgb, VkFormat.Astc8x5SrgbBlock); + Add(Format.Astc8x6Srgb, VkFormat.Astc8x6SrgbBlock); + Add(Format.Astc8x8Srgb, VkFormat.Astc8x8SrgbBlock); + Add(Format.Astc10x5Srgb, VkFormat.Astc10x5SrgbBlock); + Add(Format.Astc10x6Srgb, VkFormat.Astc10x6SrgbBlock); + Add(Format.Astc10x8Srgb, VkFormat.Astc10x8SrgbBlock); + Add(Format.Astc10x10Srgb, VkFormat.Astc10x10SrgbBlock); + Add(Format.Astc12x10Srgb, VkFormat.Astc12x10SrgbBlock); + Add(Format.Astc12x12Srgb, VkFormat.Astc12x12SrgbBlock); + Add(Format.B5G6R5Unorm, VkFormat.R5G6B5UnormPack16); + Add(Format.B5G5R5X1Unorm, VkFormat.A1R5G5B5UnormPack16); + Add(Format.B5G5R5A1Unorm, VkFormat.A1R5G5B5UnormPack16); + Add(Format.A1B5G5R5Unorm, VkFormat.R5G5B5A1UnormPack16); + Add(Format.B8G8R8X8Unorm, VkFormat.B8G8R8Unorm); + Add(Format.B8G8R8A8Unorm, VkFormat.B8G8R8A8Unorm); + Add(Format.B8G8R8X8Srgb, VkFormat.B8G8R8Srgb); + Add(Format.B8G8R8A8Srgb, VkFormat.B8G8R8A8Srgb); + } + + private static void Add(Format format, VkFormat vkFormat) + { + Table[(int)format] = vkFormat; + } + + public static VkFormat GetFormat(Format format) + { + return Table[(int)format]; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/FramebufferParams.cs b/Ryujinx.Graphics.Vulkan/FramebufferParams.cs new file mode 100644 index 000000000..3844abcd5 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/FramebufferParams.cs @@ -0,0 +1,203 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Linq; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class FramebufferParams + { + private readonly Device _device; + private readonly Auto[] _attachments; + private readonly TextureView[] _colors; + private readonly TextureView _depthStencil; + private uint _validColorAttachments; + + public uint Width { get; } + public uint Height { get; } + public uint Layers { get; } + + public uint[] AttachmentSamples { get; } + public VkFormat[] AttachmentFormats { get; } + public int[] AttachmentIndices { get; } + + public int AttachmentsCount { get; } + public int MaxColorAttachmentIndex { get; } + public bool HasDepthStencil { get; } + public int ColorAttachmentsCount => AttachmentsCount - (HasDepthStencil ? 1 : 0); + + public FramebufferParams( + Device device, + Auto view, + uint width, + uint height, + bool isDepthStencil, + VkFormat format) + { + _device = device; + _attachments = new[] { view }; + _validColorAttachments = 1u; + + Width = width; + Height = height; + Layers = 1; + + AttachmentSamples = new[] { 1u }; + AttachmentFormats = new[] { format }; + AttachmentIndices = new[] { 0 }; + + AttachmentsCount = 1; + + HasDepthStencil = isDepthStencil; + } + + public FramebufferParams(Device device, ITexture[] colors, ITexture depthStencil) + { + _device = device; + + int colorsCount = colors.Count(IsValidTextureView); + + int count = colorsCount + (IsValidTextureView(depthStencil) ? 1 : 0); + + _attachments = new Auto[count]; + _colors = new TextureView[colorsCount]; + + AttachmentSamples = new uint[count]; + AttachmentFormats = new VkFormat[count]; + AttachmentIndices = new int[count]; + MaxColorAttachmentIndex = colors.Length - 1; + + uint width = uint.MaxValue; + uint height = uint.MaxValue; + uint layers = uint.MaxValue; + + int index = 0; + int bindIndex = 0; + + foreach (ITexture color in colors) + { + if (IsValidTextureView(color)) + { + var texture = (TextureView)color; + + _attachments[index] = texture.GetImageViewForAttachment(); + _colors[index] = texture; + _validColorAttachments |= 1u << bindIndex; + + AttachmentSamples[index] = (uint)texture.Info.Samples; + AttachmentFormats[index] = texture.VkFormat; + AttachmentIndices[index] = bindIndex; + + width = Math.Min(width, (uint)texture.Width); + height = Math.Min(height, (uint)texture.Height); + layers = Math.Min(layers, (uint)texture.Layers); + + if (++index >= colorsCount) + { + break; + } + } + + bindIndex++; + } + + if (depthStencil is TextureView dsTexture && dsTexture.Valid) + { + _attachments[count - 1] = dsTexture.GetImageViewForAttachment(); + _depthStencil = dsTexture; + + AttachmentSamples[count - 1] = (uint)dsTexture.Info.Samples; + AttachmentFormats[count - 1] = dsTexture.VkFormat; + + width = Math.Min(width, (uint)dsTexture.Width); + height = Math.Min(height, (uint)dsTexture.Height); + layers = Math.Min(layers, (uint)dsTexture.Layers); + + HasDepthStencil = true; + } + + if (count == 0) + { + width = height = layers = 1; + } + + Width = width; + Height = height; + Layers = layers; + + AttachmentsCount = count; + } + + public Auto GetAttachment(int index) + { + if ((uint)index >= _attachments.Length) + { + return null; + } + + return _attachments[index]; + } + + public bool IsVaidColorAttachment(int bindIndex) + { + return (uint)bindIndex < Constants.MaxRenderTargets && (_validColorAttachments & (1u << bindIndex)) != 0; + } + + private static bool IsValidTextureView(ITexture texture) + { + return texture is TextureView view && view.Valid; + } + + public ClearRect GetClearRect(Rectangle scissor, int layer) + { + int x = scissor.X; + int y = scissor.Y; + int width = Math.Min((int)Width - scissor.X, scissor.Width); + int height = Math.Min((int)Height - scissor.Y, scissor.Height); + + return new ClearRect(new Rect2D(new Offset2D(x, y), new Extent2D((uint)width, (uint)height)), (uint)layer, 1); + } + + public unsafe Auto Create(Vk api, CommandBufferScoped cbs, Auto renderPass) + { + ImageView* attachments = stackalloc ImageView[_attachments.Length]; + + for (int i = 0; i < _attachments.Length; i++) + { + attachments[i] = _attachments[i].Get(cbs).Value; + } + + var framebufferCreateInfo = new FramebufferCreateInfo() + { + SType = StructureType.FramebufferCreateInfo, + RenderPass = renderPass.Get(cbs).Value, + AttachmentCount = (uint)_attachments.Length, + PAttachments = attachments, + Width = Width, + Height = Height, + Layers = Layers + }; + + api.CreateFramebuffer(_device, framebufferCreateInfo, null, out var framebuffer).ThrowOnError(); + return new Auto(new DisposableFramebuffer(api, _device, framebuffer), null, _attachments); + } + + public void UpdateModifications() + { + if (_colors != null) + { + for (int index = 0; index < _colors.Length; index++) + { + _colors[index].Storage.SetModification( + AccessFlags.AccessColorAttachmentWriteBit, + PipelineStageFlags.PipelineStageColorAttachmentOutputBit); + } + } + + _depthStencil?.Storage.SetModification( + AccessFlags.AccessDepthStencilAttachmentWriteBit, + PipelineStageFlags.PipelineStageColorAttachmentOutputBit); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs b/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs new file mode 100644 index 000000000..499bd3b24 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs @@ -0,0 +1,45 @@ +using Silk.NET.Vulkan; + +namespace Ryujinx.Graphics.Vulkan +{ + struct HardwareCapabilities + { + public readonly bool SupportsConditionalRendering; + public readonly bool SupportsExtendedDynamicState; + public readonly bool SupportsMultiView; + public readonly bool SupportsNullDescriptors; + public readonly bool SupportsPushDescriptors; + public readonly bool SupportsTransformFeedback; + public readonly bool SupportsTransformFeedbackQueries; + public readonly bool SupportsGeometryShader; + public readonly uint MinSubgroupSize; + public readonly uint MaxSubgroupSize; + public readonly ShaderStageFlags RequiredSubgroupSizeStages; + + public HardwareCapabilities( + bool supportsConditionalRendering, + bool supportsExtendedDynamicState, + bool supportsMultiView, + bool supportsNullDescriptors, + bool supportsPushDescriptors, + bool supportsTransformFeedback, + bool supportsTransformFeedbackQueries, + bool supportsGeometryShader, + uint minSubgroupSize, + uint maxSubgroupSize, + ShaderStageFlags requiredSubgroupSizeStages) + { + SupportsConditionalRendering = supportsConditionalRendering; + SupportsExtendedDynamicState = supportsExtendedDynamicState; + SupportsMultiView = supportsMultiView; + SupportsNullDescriptors = supportsNullDescriptors; + SupportsPushDescriptors = supportsPushDescriptors; + SupportsTransformFeedback = supportsTransformFeedback; + SupportsTransformFeedbackQueries = supportsTransformFeedbackQueries; + SupportsGeometryShader = supportsGeometryShader; + MinSubgroupSize = minSubgroupSize; + MaxSubgroupSize = maxSubgroupSize; + RequiredSubgroupSizeStages = requiredSubgroupSizeStages; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/HashTableSlim.cs b/Ryujinx.Graphics.Vulkan/HashTableSlim.cs new file mode 100644 index 000000000..5f3017301 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/HashTableSlim.cs @@ -0,0 +1,120 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Vulkan +{ + interface IRefEquatable + { + bool Equals(ref T other); + } + + class HashTableSlim where K : IRefEquatable + { + private const int TotalBuckets = 16; // Must be power of 2 + private const int TotalBucketsMask = TotalBuckets - 1; + + private struct Entry + { + public K Key; + public V Value; + } + + private readonly Entry[][] _hashTable = new Entry[TotalBuckets][]; + + public IEnumerable Keys + { + get + { + foreach (Entry[] bucket in _hashTable) + { + if (bucket != null) + { + foreach (Entry entry in bucket) + { + yield return entry.Key; + } + } + } + } + } + + public IEnumerable Values + { + get + { + foreach (Entry[] bucket in _hashTable) + { + if (bucket != null) + { + foreach (Entry entry in bucket) + { + yield return entry.Value; + } + } + } + } + } + + public void Add(ref K key, V value) + { + var entry = new Entry() + { + Key = key, + Value = value + }; + + int hashCode = key.GetHashCode(); + int bucketIndex = hashCode & TotalBucketsMask; + + var bucket = _hashTable[bucketIndex]; + if (bucket != null) + { + int index = bucket.Length; + + Array.Resize(ref _hashTable[bucketIndex], index + 1); + + _hashTable[bucketIndex][index] = entry; + } + else + { + _hashTable[bucketIndex] = new Entry[] + { + entry + }; + } + } + + public bool TryGetValue(ref K key, out V value) + { + int hashCode = key.GetHashCode(); + + /* for (int i = 0; i < _hashTable.Length; i++) + { + var b = _hashTable[i]; + if (b != null) + { + System.Console.WriteLine(typeof(K).Name + " " + i + " " + b.Length); + } + } */ + + var bucket = _hashTable[hashCode & TotalBucketsMask]; + if (bucket != null) + { + + for (int i = 0; i < bucket.Length; i++) + { + ref var entry = ref bucket[i]; + + if (entry.Key.Equals(ref key)) + { + value = entry.Value; + return true; + } + } + } + + value = default; + return false; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/HelperShader.cs b/Ryujinx.Graphics.Vulkan/HelperShader.cs new file mode 100644 index 000000000..cb5e0037b --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/HelperShader.cs @@ -0,0 +1,377 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using Ryujinx.Graphics.Vulkan.Shaders; +using Silk.NET.Vulkan; +using System; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class HelperShader : IDisposable + { + private readonly PipelineHelperShader _pipeline; + private readonly ISampler _samplerLinear; + private readonly ISampler _samplerNearest; + private readonly IProgram _programColorBlit; + private readonly IProgram _programColorBlitClearAlpha; + private readonly IProgram _programColorClear; + + public HelperShader(VulkanGraphicsDevice gd, Device device) + { + _pipeline = new PipelineHelperShader(gd, device); + + static GAL.SamplerCreateInfo GetSamplerCreateInfo(MinFilter minFilter, MagFilter magFilter) + { + return new GAL.SamplerCreateInfo( + minFilter, + magFilter, + false, + AddressMode.ClampToEdge, + AddressMode.ClampToEdge, + AddressMode.ClampToEdge, + CompareMode.None, + GAL.CompareOp.Always, + new ColorF(0f, 0f, 0f, 0f), + 0f, + 0f, + 0f, + 1f); + } + + _samplerLinear = gd.CreateSampler(GetSamplerCreateInfo(MinFilter.Linear, MagFilter.Linear)); + _samplerNearest = gd.CreateSampler(GetSamplerCreateInfo(MinFilter.Nearest, MagFilter.Nearest)); + + var vertexBindings = new ShaderBindings( + new[] { 1 }, + Array.Empty(), + Array.Empty(), + Array.Empty()); + + var fragmentBindings = new ShaderBindings( + Array.Empty(), + Array.Empty(), + new[] { 0 }, + Array.Empty()); + + _programColorBlit = gd.CreateProgramWithMinimalLayout(new[] + { + new ShaderSource(ShaderBinaries.ColorBlitVertexShaderSource, vertexBindings, ShaderStage.Vertex, TargetLanguage.Glsl), + new ShaderSource(ShaderBinaries.ColorBlitFragmentShaderSource, fragmentBindings, ShaderStage.Fragment, TargetLanguage.Glsl), + }); + + _programColorBlitClearAlpha = gd.CreateProgramWithMinimalLayout(new[] + { + new ShaderSource(ShaderBinaries.ColorBlitVertexShaderSource, vertexBindings, ShaderStage.Vertex, TargetLanguage.Glsl), + new ShaderSource(ShaderBinaries.ColorBlitClearAlphaFragmentShaderSource, fragmentBindings, ShaderStage.Fragment, TargetLanguage.Glsl), + }); + + var fragmentBindings2 = new ShaderBindings( + Array.Empty(), + Array.Empty(), + Array.Empty(), + Array.Empty()); + + _programColorClear = gd.CreateProgramWithMinimalLayout(new[] + { + new ShaderSource(ShaderBinaries.ColorClearVertexShaderSource, vertexBindings, ShaderStage.Vertex, TargetLanguage.Glsl), + new ShaderSource(ShaderBinaries.ColorClearFragmentShaderSource, fragmentBindings2, ShaderStage.Fragment, TargetLanguage.Glsl), + }); + } + + public void Blit( + VulkanGraphicsDevice gd, + TextureView src, + Auto dst, + int dstWidth, + int dstHeight, + VkFormat dstFormat, + Extents2D srcRegion, + Extents2D dstRegion, + bool linearFilter, + bool clearAlpha = false) + { + gd.FlushAllCommands(); + + using var cbs = gd.CommandBufferPool.Rent(); + + Blit(gd, cbs, src, dst, dstWidth, dstHeight, dstFormat, srcRegion, dstRegion, linearFilter, clearAlpha); + } + + public void Blit( + VulkanGraphicsDevice gd, + CommandBufferScoped cbs, + TextureView src, + Auto dst, + int dstWidth, + int dstHeight, + VkFormat dstFormat, + Extents2D srcRegion, + Extents2D dstRegion, + bool linearFilter, + bool clearAlpha = false) + { + _pipeline.SetCommandBuffer(cbs); + + const int RegionBufferSize = 16; + + var sampler = linearFilter ? _samplerLinear : _samplerNearest; + + _pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, sampler); + + Span region = stackalloc float[RegionBufferSize / sizeof(float)]; + + region[0] = (float)srcRegion.X1 / src.Width; + region[1] = (float)srcRegion.X2 / src.Width; + region[2] = (float)srcRegion.Y1 / src.Height; + region[3] = (float)srcRegion.Y2 / src.Height; + + if (dstRegion.X1 > dstRegion.X2) + { + float temp = region[0]; + region[0] = region[1]; + region[1] = temp; + } + + if (dstRegion.Y1 > dstRegion.Y2) + { + float temp = region[2]; + region[2] = region[3]; + region[3] = temp; + } + + var bufferHandle = gd.BufferManager.CreateWithHandle(gd, RegionBufferSize, false); + + gd.BufferManager.SetData(bufferHandle, 0, region); + + Span bufferRanges = stackalloc BufferRange[1]; + + bufferRanges[0] = new BufferRange(bufferHandle, 0, RegionBufferSize); + + _pipeline.SetUniformBuffers(1, bufferRanges); + + Span viewports = stackalloc GAL.Viewport[1]; + + var rect = new Rectangle( + MathF.Min(dstRegion.X1, dstRegion.X2), + MathF.Min(dstRegion.Y1, dstRegion.Y2), + MathF.Abs(dstRegion.X2 - dstRegion.X1), + MathF.Abs(dstRegion.Y2 - dstRegion.Y1)); + + viewports[0] = new GAL.Viewport( + rect, + ViewportSwizzle.PositiveX, + ViewportSwizzle.PositiveY, + ViewportSwizzle.PositiveZ, + ViewportSwizzle.PositiveW, + 0f, + 1f); + + Span> scissors = stackalloc Rectangle[1]; + + scissors[0] = new Rectangle(0, 0, dstWidth, dstHeight); + + _pipeline.SetProgram(clearAlpha ? _programColorBlitClearAlpha : _programColorBlit); + _pipeline.SetRenderTarget(dst, (uint)dstWidth, (uint)dstHeight, false, dstFormat); + _pipeline.SetRenderTargetColorMasks(new uint[] { 0xf }); + _pipeline.SetScissors(scissors); + + if (clearAlpha) + { + _pipeline.ClearRenderTargetColor(0, 0, new ColorF(0f, 0f, 0f, 1f)); + } + + _pipeline.SetViewports(0, viewports, false); + _pipeline.SetPrimitiveTopology(GAL.PrimitiveTopology.TriangleStrip); + _pipeline.Draw(4, 1, 0, 0); + _pipeline.Finish(); + + gd.BufferManager.Delete(bufferHandle); + } + + public void Clear( + VulkanGraphicsDevice gd, + Auto dst, + ReadOnlySpan clearColor, + uint componentMask, + int dstWidth, + int dstHeight, + VkFormat dstFormat, + Rectangle scissor) + { + gd.FlushAllCommands(); + + using var cbs = gd.CommandBufferPool.Rent(); + + _pipeline.SetCommandBuffer(cbs); + + const int ClearColorBufferSize = 16; + + var bufferHandle = gd.BufferManager.CreateWithHandle(gd, ClearColorBufferSize, false); + + gd.BufferManager.SetData(bufferHandle, 0, clearColor); + + Span bufferRanges = stackalloc BufferRange[1]; + + bufferRanges[0] = new BufferRange(bufferHandle, 0, ClearColorBufferSize); + + _pipeline.SetUniformBuffers(1, bufferRanges); + + Span viewports = stackalloc GAL.Viewport[1]; + + viewports[0] = new GAL.Viewport( + new Rectangle(0, 0, dstWidth, dstHeight), + ViewportSwizzle.PositiveX, + ViewportSwizzle.PositiveY, + ViewportSwizzle.PositiveZ, + ViewportSwizzle.PositiveW, + 0f, + 1f); + + Span> scissors = stackalloc Rectangle[1]; + + scissors[0] = scissor; + + _pipeline.SetProgram(_programColorClear); + _pipeline.SetRenderTarget(dst, (uint)dstWidth, (uint)dstHeight, false, dstFormat); + _pipeline.SetRenderTargetColorMasks(new uint[] { componentMask }); + _pipeline.SetViewports(0, viewports, false); + _pipeline.SetScissors(scissors); + _pipeline.SetPrimitiveTopology(GAL.PrimitiveTopology.TriangleStrip); + _pipeline.Draw(4, 1, 0, 0); + _pipeline.Finish(); + + gd.BufferManager.Delete(bufferHandle); + } + + public void DrawTexture( + VulkanGraphicsDevice gd, + PipelineBase pipeline, + TextureView src, + ISampler srcSampler, + Extents2DF srcRegion, + Extents2DF dstRegion) + { + const int RegionBufferSize = 16; + + pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, srcSampler); + + Span region = stackalloc float[RegionBufferSize / sizeof(float)]; + + region[0] = srcRegion.X1 / src.Width; + region[1] = srcRegion.X2 / src.Width; + region[2] = srcRegion.Y1 / src.Height; + region[3] = srcRegion.Y2 / src.Height; + + if (dstRegion.X1 > dstRegion.X2) + { + float temp = region[0]; + region[0] = region[1]; + region[1] = temp; + } + + if (dstRegion.Y1 > dstRegion.Y2) + { + float temp = region[2]; + region[2] = region[3]; + region[3] = temp; + } + + var bufferHandle = gd.BufferManager.CreateWithHandle(gd, RegionBufferSize, false); + + gd.BufferManager.SetData(bufferHandle, 0, region); + + Span bufferRanges = stackalloc BufferRange[1]; + + bufferRanges[0] = new BufferRange(bufferHandle, 0, RegionBufferSize); + + pipeline.SetUniformBuffers(1, bufferRanges); + + Span viewports = stackalloc GAL.Viewport[1]; + + var rect = new Rectangle( + MathF.Min(dstRegion.X1, dstRegion.X2), + MathF.Min(dstRegion.Y1, dstRegion.Y2), + MathF.Abs(dstRegion.X2 - dstRegion.X1), + MathF.Abs(dstRegion.Y2 - dstRegion.Y1)); + + viewports[0] = new GAL.Viewport( + rect, + ViewportSwizzle.PositiveX, + ViewportSwizzle.PositiveY, + ViewportSwizzle.PositiveZ, + ViewportSwizzle.PositiveW, + 0f, + 1f); + + Span> scissors = stackalloc Rectangle[1]; + + pipeline.SetProgram(_programColorBlit); + pipeline.SetViewports(0, viewports, false); + pipeline.SetPrimitiveTopology(GAL.PrimitiveTopology.TriangleStrip); + pipeline.Draw(4, 1, 0, 0); + + gd.BufferManager.Delete(bufferHandle); + } + + public unsafe void ConvertI8ToI16(VulkanGraphicsDevice gd, CommandBufferScoped cbs, BufferHolder src, BufferHolder dst, int srcOffset, int size) + { + // TODO: Do this with a compute shader? + var srcBuffer = src.GetBuffer().Get(cbs, srcOffset, size).Value; + var dstBuffer = dst.GetBuffer().Get(cbs, 0, size * 2).Value; + + gd.Api.CmdFillBuffer(cbs.CommandBuffer, dstBuffer, 0, Vk.WholeSize, 0); + + var bufferCopy = new BufferCopy[size]; + + for (ulong i = 0; i < (ulong)size; i++) + { + bufferCopy[i] = new BufferCopy((ulong)srcOffset + i, i * 2, 1); + } + + BufferHolder.InsertBufferBarrier( + gd, + cbs.CommandBuffer, + dstBuffer, + BufferHolder.DefaultAccessFlags, + AccessFlags.AccessTransferWriteBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + PipelineStageFlags.PipelineStageTransferBit, + 0, + size * 2); + + fixed (BufferCopy* pBufferCopy = bufferCopy) + { + gd.Api.CmdCopyBuffer(cbs.CommandBuffer, srcBuffer, dstBuffer, (uint)size, pBufferCopy); + } + + BufferHolder.InsertBufferBarrier( + gd, + cbs.CommandBuffer, + dstBuffer, + AccessFlags.AccessTransferWriteBit, + BufferHolder.DefaultAccessFlags, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + 0, + size * 2); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _programColorBlitClearAlpha.Dispose(); + _programColorBlit.Dispose(); + _samplerNearest.Dispose(); + _samplerLinear.Dispose(); + _pipeline.Dispose(); + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/ImageWindow.cs b/Ryujinx.Graphics.Vulkan/ImageWindow.cs new file mode 100644 index 000000000..b4d893dfb --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/ImageWindow.cs @@ -0,0 +1,365 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class ImageWindow : WindowBase, IWindow, IDisposable + { + private const int ImageCount = 5; + private const int SurfaceWidth = 1280; + private const int SurfaceHeight = 720; + + private readonly VulkanGraphicsDevice _gd; + private readonly PhysicalDevice _physicalDevice; + private readonly Device _device; + + private Auto[] _images; + private Auto[] _imageViews; + private Auto[] _imageAllocationAuto; + private ulong[] _imageSizes; + private ulong[] _imageOffsets; + + private Semaphore _imageAvailableSemaphore; + private Semaphore _renderFinishedSemaphore; + + private int _width = SurfaceWidth; + private int _height = SurfaceHeight; + private VkFormat _format; + private bool _recreateImages; + private int _nextImage; + + internal new bool ScreenCaptureRequested { get; set; } + + public unsafe ImageWindow(VulkanGraphicsDevice gd, PhysicalDevice physicalDevice, Device device) + { + _gd = gd; + _physicalDevice = physicalDevice; + _device = device; + + _format = VkFormat.R8G8B8A8Unorm; + + _images = new Auto[ImageCount]; + _imageAllocationAuto = new Auto[ImageCount]; + _imageSizes = new ulong[ImageCount]; + _imageOffsets = new ulong[ImageCount]; + + CreateImages(); + + var semaphoreCreateInfo = new SemaphoreCreateInfo() + { + SType = StructureType.SemaphoreCreateInfo + }; + + gd.Api.CreateSemaphore(device, semaphoreCreateInfo, null, out _imageAvailableSemaphore).ThrowOnError(); + gd.Api.CreateSemaphore(device, semaphoreCreateInfo, null, out _renderFinishedSemaphore).ThrowOnError(); + } + + private void RecreateImages() + { + for (int i = 0; i < ImageCount; i++) + { + _imageViews[i]?.Dispose(); + _imageAllocationAuto[i]?.Dispose(); + _images[i]?.Dispose(); + } + + CreateImages(); + } + + private void CreateImages() + { + _imageViews = new Auto[ImageCount]; + unsafe + { + var cbs = _gd.CommandBufferPool.Rent(); + for (int i = 0; i < _images.Length; i++) + { + var imageCreateInfo = new ImageCreateInfo + { + SType = StructureType.ImageCreateInfo, + ImageType = ImageType.ImageType2D, + Format = _format, + Extent = + new Extent3D((uint?)_width, + (uint?)_height, 1), + MipLevels = 1, + ArrayLayers = 1, + Samples = SampleCountFlags.SampleCount1Bit, + Tiling = ImageTiling.Optimal, + Usage = ImageUsageFlags.ImageUsageColorAttachmentBit | ImageUsageFlags.ImageUsageTransferSrcBit | ImageUsageFlags.ImageUsageTransferDstBit, + SharingMode = SharingMode.Exclusive, + InitialLayout = ImageLayout.Undefined, + Flags = ImageCreateFlags.ImageCreateMutableFormatBit + }; + + _gd.Api.CreateImage(_device, imageCreateInfo, null, out var image).ThrowOnError(); + _images[i] = new Auto(new DisposableImage(_gd.Api, _device, image)); + + _gd.Api.GetImageMemoryRequirements(_device, image, + out var memoryRequirements); + + var allocation = _gd.MemoryAllocator.AllocateDeviceMemory(_physicalDevice, memoryRequirements, MemoryPropertyFlags.MemoryPropertyDeviceLocalBit); + + _imageSizes[i] = allocation.Size; + _imageOffsets[i] = allocation.Offset; + + _imageAllocationAuto[i] = new Auto(allocation); + + _gd.Api.BindImageMemory(_device, image, allocation.Memory, allocation.Offset); + + _imageViews[i] = CreateImageView(image, _format); + + Transition( + cbs.CommandBuffer, + image, + 0, + 0, + ImageLayout.Undefined, + ImageLayout.ColorAttachmentOptimal); + } + + _gd.CommandBufferPool.Return(cbs); + } + } + + private unsafe Auto CreateImageView(Image image, VkFormat format) + { + var componentMapping = new ComponentMapping( + ComponentSwizzle.R, + ComponentSwizzle.G, + ComponentSwizzle.B, + ComponentSwizzle.A); + + var aspectFlags = ImageAspectFlags.ImageAspectColorBit; + + var subresourceRange = new ImageSubresourceRange(aspectFlags, 0, 1, 0, 1); + + var imageCreateInfo = new ImageViewCreateInfo() + { + SType = StructureType.ImageViewCreateInfo, + Image = image, + ViewType = ImageViewType.ImageViewType2D, + Format = format, + Components = componentMapping, + SubresourceRange = subresourceRange + }; + + _gd.Api.CreateImageView(_device, imageCreateInfo, null, out var imageView).ThrowOnError(); + return new Auto(new DisposableImageView(_gd.Api, _device, imageView)); + } + + public override unsafe void Present(ITexture texture, ImageCrop crop, Action swapBuffersCallback) + { + if (_recreateImages) + { + RecreateImages(); + _recreateImages = false; + } + + var image = _images[_nextImage]; + + _gd.FlushAllCommands(); + + var cbs = _gd.CommandBufferPool.Rent(); + + Transition( + cbs.CommandBuffer, + image.GetUnsafe().Value, + 0, + AccessFlags.AccessTransferWriteBit, + ImageLayout.ColorAttachmentOptimal, + ImageLayout.General); + + var view = (TextureView)texture; + + int srcX0, srcX1, srcY0, srcY1; + float scale = view.ScaleFactor; + + if (crop.Left == 0 && crop.Right == 0) + { + srcX0 = 0; + srcX1 = (int)(view.Width / scale); + } + else + { + srcX0 = crop.Left; + srcX1 = crop.Right; + } + + if (crop.Top == 0 && crop.Bottom == 0) + { + srcY0 = 0; + srcY1 = (int)(view.Height / scale); + } + else + { + srcY0 = crop.Top; + srcY1 = crop.Bottom; + } + + if (scale != 1f) + { + srcX0 = (int)(srcX0 * scale); + srcY0 = (int)(srcY0 * scale); + srcX1 = (int)Math.Ceiling(srcX1 * scale); + srcY1 = (int)Math.Ceiling(srcY1 * scale); + } + + if (ScreenCaptureRequested) + { + CaptureFrame(view, srcX0, srcY0, srcX1 - srcX0, srcY1 - srcY0, view.Info.Format.IsBgr(), crop.FlipX, crop.FlipY); + + ScreenCaptureRequested = false; + } + + float ratioX = crop.IsStretched ? 1.0f : MathF.Min(1.0f, _height * crop.AspectRatioX / (_width * crop.AspectRatioY)); + float ratioY = crop.IsStretched ? 1.0f : MathF.Min(1.0f, _width * crop.AspectRatioY / (_height * crop.AspectRatioX)); + + int dstWidth = (int)(_width * ratioX); + int dstHeight = (int)(_height * ratioY); + + int dstPaddingX = (_width - dstWidth) / 2; + int dstPaddingY = (_height - dstHeight) / 2; + + int dstX0 = crop.FlipX ? _width - dstPaddingX : dstPaddingX; + int dstX1 = crop.FlipX ? dstPaddingX : _width - dstPaddingX; + + int dstY0 = crop.FlipY ? dstPaddingY : _height - dstPaddingY; + int dstY1 = crop.FlipY ? _height - dstPaddingY : dstPaddingY; + + _gd.HelperShader.Blit( + _gd, + cbs, + view, + _imageViews[_nextImage], + _width, + _height, + _format, + new Extents2D(srcX0, srcY0, srcX1, srcY1), + new Extents2D(dstX0, dstY1, dstX1, dstY0), + true, + true); + + Transition( + cbs.CommandBuffer, + image.GetUnsafe().Value, + 0, + 0, + ImageLayout.General, + ImageLayout.ColorAttachmentOptimal); + + _gd.CommandBufferPool.Return( + cbs, + null, + new[] { PipelineStageFlags.PipelineStageColorAttachmentOutputBit }, + null); + + var memory = _imageAllocationAuto[_nextImage].GetUnsafe().Memory; + var presentInfo = new PresentImageInfo(image.GetUnsafe().Value, memory, _imageSizes[_nextImage], _imageOffsets[_nextImage], _renderFinishedSemaphore, _imageAvailableSemaphore); + + swapBuffersCallback(presentInfo); + + _nextImage %= ImageCount; + } + + private unsafe void Transition( + CommandBuffer commandBuffer, + Image image, + AccessFlags srcAccess, + AccessFlags dstAccess, + ImageLayout srcLayout, + ImageLayout dstLayout) + { + var subresourceRange = new ImageSubresourceRange(ImageAspectFlags.ImageAspectColorBit, 0, 1, 0, 1); + + var barrier = new ImageMemoryBarrier() + { + SType = StructureType.ImageMemoryBarrier, + SrcAccessMask = srcAccess, + DstAccessMask = dstAccess, + OldLayout = srcLayout, + NewLayout = dstLayout, + SrcQueueFamilyIndex = Vk.QueueFamilyIgnored, + DstQueueFamilyIndex = Vk.QueueFamilyIgnored, + Image = image, + SubresourceRange = subresourceRange + }; + + _gd.Api.CmdPipelineBarrier( + commandBuffer, + PipelineStageFlags.PipelineStageTopOfPipeBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + 0, + 0, + null, + 0, + null, + 1, + barrier); + } + + private void CaptureFrame(TextureView texture, int x, int y, int width, int height, bool isBgra, bool flipX, bool flipY) + { + byte[] bitmap = texture.GetData(x, y, width, height); + + _gd.OnScreenCaptured(new ScreenCaptureImageInfo(width, height, isBgra, bitmap, flipX, flipY)); + } + + public override void SetSize(int width, int height) + { + if (_width != width || _height != height) + { + _recreateImages = true; + } + + _width = width; + _height = height; + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + unsafe + { + _gd.Api.DestroySemaphore(_device, _renderFinishedSemaphore, null); + _gd.Api.DestroySemaphore(_device, _imageAvailableSemaphore, null); + + for (int i = 0; i < ImageCount; i++) + { + _imageViews[i]?.Dispose(); + _imageAllocationAuto[i]?.Dispose(); + _images[i]?.Dispose(); + } + } + } + } + + public override void Dispose() + { + Dispose(true); + } + } + + public class PresentImageInfo + { + public Image Image { get; } + public DeviceMemory Memory { get; } + public ulong MemorySize { get; set; } + public ulong MemoryOffset { get; set; } + public Semaphore ReadySemaphore { get; } + public Semaphore AvailableSemaphore { get; } + + public PresentImageInfo(Image image, DeviceMemory memory, ulong memorySize, ulong memoryOffset, Semaphore readySemaphore, Semaphore availableSemaphore) + { + this.Image = image; + this.Memory = memory; + this.MemorySize = memorySize; + this.MemoryOffset = memoryOffset; + this.ReadySemaphore = readySemaphore; + this.AvailableSemaphore = availableSemaphore; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vulkan/MemoryAllocation.cs b/Ryujinx.Graphics.Vulkan/MemoryAllocation.cs new file mode 100644 index 000000000..04956e36a --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/MemoryAllocation.cs @@ -0,0 +1,37 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct MemoryAllocation : IDisposable + { + private readonly MemoryAllocatorBlockList _owner; + private readonly MemoryAllocatorBlockList.Block _block; + + public DeviceMemory Memory { get; } + public IntPtr HostPointer { get;} + public ulong Offset { get; } + public ulong Size { get; } + + public MemoryAllocation( + MemoryAllocatorBlockList owner, + MemoryAllocatorBlockList.Block block, + DeviceMemory memory, + IntPtr hostPointer, + ulong offset, + ulong size) + { + _owner = owner; + _block = block; + Memory = memory; + HostPointer = hostPointer; + Offset = offset; + Size = size; + } + + public void Dispose() + { + _owner.Free(_block, Offset, Size); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/MemoryAllocator.cs b/Ryujinx.Graphics.Vulkan/MemoryAllocator.cs new file mode 100644 index 000000000..2414aafd1 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/MemoryAllocator.cs @@ -0,0 +1,84 @@ +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Vulkan +{ + class MemoryAllocator : IDisposable + { + private ulong MaxDeviceMemoryUsageEstimate = 16UL * 1024 * 1024 * 1024; + + private readonly Vk _api; + private readonly Device _device; + private readonly List _blockLists; + + private int _blockAlignment; + + public MemoryAllocator(Vk api, Device device, uint maxMemoryAllocationCount) + { + _api = api; + _device = device; + _blockLists = new List(); + _blockAlignment = (int)Math.Min(int.MaxValue, MaxDeviceMemoryUsageEstimate / (ulong)maxMemoryAllocationCount); + } + + public MemoryAllocation AllocateDeviceMemory( + PhysicalDevice physicalDevice, + MemoryRequirements requirements, + MemoryPropertyFlags flags = 0) + { + int memoryTypeIndex = FindSuitableMemoryTypeIndex(_api, physicalDevice, requirements.MemoryTypeBits, flags); + if (memoryTypeIndex < 0) + { + return default; + } + + bool map = flags.HasFlag(MemoryPropertyFlags.MemoryPropertyHostVisibleBit); + return Allocate(memoryTypeIndex, requirements.Size, requirements.Alignment, map); + } + + private MemoryAllocation Allocate(int memoryTypeIndex, ulong size, ulong alignment, bool map) + { + for (int i = 0; i < _blockLists.Count; i++) + { + var bl = _blockLists[i]; + if (bl.MemoryTypeIndex == memoryTypeIndex) + { + lock (bl) + { + return bl.Allocate(size, alignment, map); + } + } + } + + var newBl = new MemoryAllocatorBlockList(_api, _device, memoryTypeIndex, _blockAlignment); + _blockLists.Add(newBl); + return newBl.Allocate(size, alignment, map); + } + + private static int FindSuitableMemoryTypeIndex(Vk api, PhysicalDevice physicalDevice, uint memoryTypeBits, MemoryPropertyFlags flags) + { + api.GetPhysicalDeviceMemoryProperties(physicalDevice, out var properties); + + for (int i = 0; i < properties.MemoryTypeCount; i++) + { + var type = properties.MemoryTypes[i]; + + if ((memoryTypeBits & (1 << i)) != 0 && type.PropertyFlags.HasFlag(flags)) + { + return i; + } + } + + return -1; + } + + public unsafe void Dispose() + { + for (int i = 0; i < _blockLists.Count; i++) + { + _blockLists[i].Dispose(); + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/MemoryAllocatorBlockList.cs b/Ryujinx.Graphics.Vulkan/MemoryAllocatorBlockList.cs new file mode 100644 index 000000000..53deaa4ec --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/MemoryAllocatorBlockList.cs @@ -0,0 +1,280 @@ +using Ryujinx.Common; +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Vulkan +{ + class MemoryAllocatorBlockList : IDisposable + { + private const ulong InvalidOffset = ulong.MaxValue; + + public class Block : IComparable + { + public DeviceMemory Memory { get; private set; } + public IntPtr HostPointer { get; private set; } + public ulong Size { get; } + public bool Maped => HostPointer != IntPtr.Zero; + + private struct Range : IComparable + { + public ulong Offset { get; } + public ulong Size { get; } + + public Range(ulong offset, ulong size) + { + Offset = offset; + Size = size; + } + + public int CompareTo(Range other) + { + return Offset.CompareTo(other.Offset); + } + } + + private readonly List _freeRanges; + + public Block(DeviceMemory memory, IntPtr hostPointer, ulong size) + { + Memory = memory; + HostPointer = hostPointer; + Size = size; + _freeRanges = new List + { + new Range(0, size) + }; + } + + public ulong Allocate(ulong size, ulong alignment) + { + for (int i = 0; i < _freeRanges.Count; i++) + { + var range = _freeRanges[i]; + + ulong alignedOffset = BitUtils.AlignUp(range.Offset, (int)alignment); + ulong sizeDelta = alignedOffset - range.Offset; + ulong usableSize = range.Size - sizeDelta; + + if (sizeDelta < range.Size && usableSize >= size) + { + _freeRanges.RemoveAt(i); + + if (sizeDelta != 0) + { + InsertFreeRange(range.Offset, sizeDelta); + } + + ulong endOffset = range.Offset + range.Size; + ulong remainingSize = endOffset - (alignedOffset + size); + if (remainingSize != 0) + { + InsertFreeRange(endOffset - remainingSize, remainingSize); + } + + return alignedOffset; + } + } + + return InvalidOffset; + } + + public void Free(ulong offset, ulong size) + { + InsertFreeRangeComingled(offset, size); + } + + private void InsertFreeRange(ulong offset, ulong size) + { + var range = new Range(offset, size); + int index = _freeRanges.BinarySearch(range); + if (index < 0) + { + index = ~index; + } + + _freeRanges.Insert(index, range); + } + + private void InsertFreeRangeComingled(ulong offset, ulong size) + { + ulong endOffset = offset + size; + var range = new Range(offset, size); + int index = _freeRanges.BinarySearch(range); + if (index < 0) + { + index = ~index; + } + + if (index < _freeRanges.Count && _freeRanges[index].Offset == endOffset) + { + endOffset = _freeRanges[index].Offset + _freeRanges[index].Size; + _freeRanges.RemoveAt(index); + } + + if (index > 0 && _freeRanges[index - 1].Offset + _freeRanges[index - 1].Size == offset) + { + offset = _freeRanges[index - 1].Offset; + _freeRanges.RemoveAt(--index); + } + + range = new Range(offset, endOffset - offset); + + _freeRanges.Insert(index, range); + } + + public bool IsTotallyFree() + { + if (_freeRanges.Count == 1 && _freeRanges[0].Size == Size) + { + Debug.Assert(_freeRanges[0].Offset == 0); + return true; + } + + return false; + } + + public int CompareTo(Block other) + { + return Size.CompareTo(other.Size); + } + + public unsafe void Destroy(Vk api, Device device) + { + if (Maped) + { + api.UnmapMemory(device, Memory); + HostPointer = IntPtr.Zero; + } + + if (Memory.Handle != 0) + { + api.FreeMemory(device, Memory, null); + Memory = default; + } + } + } + + private readonly List _blocks; + + private readonly Vk _api; + private readonly Device _device; + + public int MemoryTypeIndex { get; } + + private readonly int _blockAlignment; + + public MemoryAllocatorBlockList(Vk api, Device device, int memoryTypeIndex, int blockAlignment) + { + _blocks = new List(); + _api = api; + _device = device; + MemoryTypeIndex = memoryTypeIndex; + _blockAlignment = blockAlignment; + } + + public unsafe MemoryAllocation Allocate(ulong size, ulong alignment, bool map) + { + // Ensure we have a sane alignment value. + if ((ulong)(int)alignment != alignment || (int)alignment <= 0) + { + throw new ArgumentOutOfRangeException(nameof(alignment), $"Invalid alignment 0x{alignment:X}."); + } + + for (int i = 0; i < _blocks.Count; i++) + { + var block = _blocks[i]; + + if (block.Maped == map && block.Size >= size) + { + ulong offset = block.Allocate(size, alignment); + if (offset != InvalidOffset) + { + return new MemoryAllocation(this, block, block.Memory, GetHostPointer(block, offset), offset, size); + } + } + } + + ulong blockAlignedSize = BitUtils.AlignUp(size, _blockAlignment); + + var memoryAllocateInfo = new MemoryAllocateInfo() + { + SType = StructureType.MemoryAllocateInfo, + AllocationSize = blockAlignedSize, + MemoryTypeIndex = (uint)MemoryTypeIndex + }; + + _api.AllocateMemory(_device, memoryAllocateInfo, null, out var deviceMemory).ThrowOnError(); + + IntPtr hostPointer = IntPtr.Zero; + + if (map) + { + unsafe + { + void* pointer = null; + _api.MapMemory(_device, deviceMemory, 0, blockAlignedSize, 0, ref pointer).ThrowOnError(); + hostPointer = (IntPtr)pointer; + } + } + + var newBlock = new Block(deviceMemory, hostPointer, blockAlignedSize); + + InsertBlock(newBlock); + + ulong newBlockOffset = newBlock.Allocate(size, alignment); + Debug.Assert(newBlockOffset != InvalidOffset); + + return new MemoryAllocation(this, newBlock, deviceMemory, GetHostPointer(newBlock, newBlockOffset), newBlockOffset, size); + } + + private static IntPtr GetHostPointer(Block block, ulong offset) + { + if (block.HostPointer == IntPtr.Zero) + { + return IntPtr.Zero; + } + + return (IntPtr)((nuint)(nint)block.HostPointer + offset); + } + + public unsafe void Free(Block block, ulong offset, ulong size) + { + block.Free(offset, size); + + if (block.IsTotallyFree()) + { + for (int i = 0; i < _blocks.Count; i++) + { + if (_blocks[i] == block) + { + _blocks.RemoveAt(i); + break; + } + } + + block.Destroy(_api, _device); + } + } + + private void InsertBlock(Block block) + { + int index = _blocks.BinarySearch(block); + if (index < 0) + { + index = ~index; + } + + _blocks.Insert(index, block); + } + + public unsafe void Dispose() + { + for (int i = 0; i < _blocks.Count; i++) + { + _blocks[i].Destroy(_api, _device); + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/MultiFenceHolder.cs b/Ryujinx.Graphics.Vulkan/MultiFenceHolder.cs new file mode 100644 index 000000000..50917680c --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/MultiFenceHolder.cs @@ -0,0 +1,229 @@ +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Ryujinx.Graphics.Vulkan +{ + /// + /// Holder for multiple host GPU fences. + /// + class MultiFenceHolder + { + private static int BufferUsageTrackingGranularity = 4096; + + private readonly Dictionary _fences; + private BufferUsageBitmap _bufferUsageBitmap; + + /// + /// Creates a new instance of the multiple fence holder. + /// + public MultiFenceHolder() + { + _fences = new Dictionary(); + } + + /// + /// Creates a new instance of the multiple fence holder, with a given buffer size in mind. + /// + /// Size of the buffer + public MultiFenceHolder(int size) + { + _fences = new Dictionary(); + + if (VulkanConfiguration.UseGranularBufferTracking) + { + _bufferUsageBitmap = new BufferUsageBitmap(size, BufferUsageTrackingGranularity); + } + } + + public void AddBufferUse(int cbIndex, int offset, int size) + { + if (VulkanConfiguration.UseGranularBufferTracking) + { + _bufferUsageBitmap.Add(cbIndex, offset, size); + } + } + + public void RemoveBufferUses(int cbIndex) + { + if (VulkanConfiguration.UseGranularBufferTracking) + { + _bufferUsageBitmap?.Clear(cbIndex); + } + } + + public bool IsBufferRangeInUse(int cbIndex, int offset, int size) + { + if (VulkanConfiguration.UseGranularBufferTracking) + { + return _bufferUsageBitmap.OverlapsWith(cbIndex, offset, size); + } + else + { + return true; + } + } + + public bool IsBufferRangeInUse(int offset, int size) + { + if (VulkanConfiguration.UseGranularBufferTracking) + { + return _bufferUsageBitmap.OverlapsWith(offset, size); + } + else + { + return true; + } + } + + /// + /// Adds a fence to the holder. + /// + /// Command buffer index of the command buffer that owns the fence + /// Fence to be added + public void AddFence(int cbIndex, FenceHolder fence) + { + lock (_fences) + { + _fences.TryAdd(fence, cbIndex); + } + } + + /// + /// Removes a fence from the holder. + /// + /// Command buffer index of the command buffer that owns the fence + /// Fence to be removed + public void RemoveFence(int cbIndex, FenceHolder fence) + { + lock (_fences) + { + _fences.Remove(fence); + } + } + + /// + /// Wait until all the fences on the holder are signaled. + /// + /// + /// GPU device that the fences belongs to + public void WaitForFences(Vk api, Device device) + { + WaitForFencesImpl(api, device, 0, 0, false, 0UL); + } + + /// + /// Wait until all the fences on the holder with buffer uses overlapping the specified range are signaled. + /// + /// + /// + /// + /// + public void WaitForFences(Vk api, Device device, int offset, int size) + { + WaitForFencesImpl(api, device, offset, size, false, 0UL); + } + + /// + /// Wait until all the fences on the holder are signaled, or the timeout expires. + /// + /// + /// GPU device that the fences belongs to + /// Timeout in nanoseconds + /// True if all fences were signaled, false otherwise + public bool WaitForFences(Vk api, Device device, ulong timeout) + { + return WaitForFencesImpl(api, device, 0, 0, true, timeout); + } + + private bool WaitForFencesImpl(Vk api, Device device, int offset, int size, bool hasTimeout, ulong timeout) + { + FenceHolder[] fenceHolders; + Fence[] fences; + + lock (_fences) + { + fenceHolders = size != 0 && VulkanConfiguration.UseGranularBufferTracking ? GetOverlappingFences(offset, size) : _fences.Keys.ToArray(); + fences = new Fence[fenceHolders.Length]; + + for (int i = 0; i < fenceHolders.Length; i++) + { + fences[i] = fenceHolders[i].Get(); + } + } + + if (fences.Length == 0) + { + return true; + } + + bool signaled = true; + + if (hasTimeout) + { + signaled = FenceHelper.AllSignaled(api, device, fences, timeout); + } + else + { + FenceHelper.WaitAllIndefinitely(api, device, fences); + } + + for (int i = 0; i < fenceHolders.Length; i++) + { + fenceHolders[i].Put(); + } + + return signaled; + } + + public bool MayWait(Vk api, Device device, int offset, int size) + { + if (_fences.Count == 0) + { + return false; + } + + if (VulkanConfiguration.UseGranularBufferTracking) + { + lock (_fences) + { + foreach (var kv in _fences) + { + var fence = kv.Key; + var ownerCbIndex = kv.Value; + + if (_bufferUsageBitmap.OverlapsWith(ownerCbIndex, offset, size)) + { + return true; + } + } + } + + return false; + } + else + { + return true; + } + } + + private FenceHolder[] GetOverlappingFences(int offset, int size) + { + List overlapping = new List(); + + foreach (var kv in _fences) + { + var fence = kv.Key; + var ownerCbIndex = kv.Value; + + if (_bufferUsageBitmap.OverlapsWith(ownerCbIndex, offset, size)) + { + overlapping.Add(fence); + } + } + + return overlapping.ToArray(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/NativeArray.cs b/Ryujinx.Graphics.Vulkan/NativeArray.cs new file mode 100644 index 000000000..9d66ce8d7 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/NativeArray.cs @@ -0,0 +1,45 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Vulkan +{ + unsafe class NativeArray : IDisposable where T : unmanaged + { + public T* Pointer { get; private set; } + public int Length { get; } + + public ref T this[int index] + { + get => ref Pointer[Checked(index)]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int Checked(int index) + { + if ((uint)index >= (uint)Length) + { + throw new IndexOutOfRangeException(); + } + + return index; + } + + public NativeArray(int length) + { + Pointer = (T*)Marshal.AllocHGlobal(checked(length * Unsafe.SizeOf())); + Length = length; + } + + public Span ToSpan() + { + return new Span(Pointer, Length); + } + + public void Dispose() + { + Marshal.FreeHGlobal((IntPtr)Pointer); + Pointer = null; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PersistentFlushBuffer.cs b/Ryujinx.Graphics.Vulkan/PersistentFlushBuffer.cs new file mode 100644 index 000000000..c23d27b54 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PersistentFlushBuffer.cs @@ -0,0 +1,89 @@ +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + internal class PersistentFlushBuffer : IDisposable + { + private VulkanGraphicsDevice _gd; + + private BufferHolder _flushStorage; + + public PersistentFlushBuffer(VulkanGraphicsDevice gd) + { + _gd = gd; + } + + private BufferHolder ResizeIfNeeded(int size) + { + var flushStorage = _flushStorage; + + if (flushStorage == null || size > _flushStorage.Size) + { + if (flushStorage != null) + { + flushStorage.Dispose(); + } + + flushStorage = _gd.BufferManager.Create(_gd, size); + _flushStorage = flushStorage; + } + + return flushStorage; + } + + public Span GetBufferData(CommandBufferPool cbp, BufferHolder buffer, int offset, int size) + { + var flushStorage = ResizeIfNeeded(size); + + using (var cbs = cbp.Rent()) + { + var srcBuffer = buffer.GetBuffer(cbs.CommandBuffer); + var dstBuffer = flushStorage.GetBuffer(cbs.CommandBuffer); + + BufferHolder.Copy(_gd, cbs, srcBuffer, dstBuffer, offset, 0, size); + } + + flushStorage.WaitForFences(); + return flushStorage.GetDataStorage(0, size); + } + + public Span GetTextureData(CommandBufferPool cbp, TextureView view, int size) + { + GAL.TextureCreateInfo info = view.Info; + + var flushStorage = ResizeIfNeeded(size); + + using (var cbs = cbp.Rent()) + { + var buffer = flushStorage.GetBuffer(cbs.CommandBuffer).Get(cbs).Value; + var image = view.GetImage().Get(cbs).Value; + + view.CopyFromOrToBuffer(cbs.CommandBuffer, buffer, image, size, true, 0, 0, info.GetLayers(), info.Levels, singleSlice: false); + } + + flushStorage.WaitForFences(); + return flushStorage.GetDataStorage(0, size); + } + + public Span GetTextureData(CommandBufferPool cbp, TextureView view, int size, int layer, int level) + { + var flushStorage = ResizeIfNeeded(size); + + using (var cbs = cbp.Rent()) + { + var buffer = flushStorage.GetBuffer(cbs.CommandBuffer).Get(cbs).Value; + var image = view.GetImage().Get(cbs).Value; + + view.CopyFromOrToBuffer(cbs.CommandBuffer, buffer, image, size, true, layer, level, 1, 1, singleSlice: true); + } + + flushStorage.WaitForFences(); + return flushStorage.GetDataStorage(0, size); + } + + public void Dispose() + { + _flushStorage.Dispose(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineBase.cs b/Ryujinx.Graphics.Vulkan/PipelineBase.cs new file mode 100644 index 000000000..c20a85fb6 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineBase.cs @@ -0,0 +1,1272 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + class PipelineBase : IDisposable + { + public const int DescriptorSetLayouts = 4; + + public const int UniformSetIndex = 0; + public const int StorageSetIndex = 1; + public const int TextureSetIndex = 2; + public const int ImageSetIndex = 3; + + protected readonly VulkanGraphicsDevice Gd; + protected readonly Device Device; + public readonly PipelineCache PipelineCache; + + private PipelineDynamicState _dynamicState; + private PipelineState _newState; + private bool _stateDirty; + private GAL.PrimitiveTopology _topology; + + private ulong _currentPipelineHandle; + + protected Auto Pipeline; + + protected PipelineBindPoint Pbp; + + protected CommandBufferScoped Cbs; + protected CommandBufferScoped? PreloadCbs; + protected CommandBuffer CommandBuffer; + + public CommandBufferScoped CurrentCommandBuffer => Cbs; + + private ShaderCollection _program; + + private Vector4[] _renderScale = new Vector4[73]; + private int _fragmentScaleCount; + + protected FramebufferParams FramebufferParams; + private Auto _framebuffer; + private Auto _renderPass; + private int _writtenAttachmentCount; + private bool _renderPassActive; + + private readonly DescriptorSetUpdater _descriptorSetUpdater; + + private BufferState _indexBuffer; + private readonly BufferState[] _transformFeedbackBuffers; + private readonly BufferState[] _vertexBuffers; + protected Rectangle ClearScissor; + + public SupportBufferUpdater SupportBufferUpdater; + + private bool _needsIndexBufferRebind; + private bool _needsTransformFeedbackBuffersRebind; + private bool _needsVertexBuffersRebind; + + private bool _tfEnabled; + private bool _tfActive; + + public ulong DrawCount { get; private set; } + + public unsafe PipelineBase(VulkanGraphicsDevice gd, Device device) + { + Gd = gd; + Device = device; + + var pipelineCacheCreateInfo = new PipelineCacheCreateInfo() + { + SType = StructureType.PipelineCacheCreateInfo + }; + + gd.Api.CreatePipelineCache(device, pipelineCacheCreateInfo, null, out PipelineCache).ThrowOnError(); + + _descriptorSetUpdater = new DescriptorSetUpdater(gd, this); + + _transformFeedbackBuffers = new BufferState[Constants.MaxTransformFeedbackBuffers]; + _vertexBuffers = new BufferState[Constants.MaxVertexBuffers + 1]; + + const int EmptyVbSize = 16; + + using var emptyVb = gd.BufferManager.Create(gd, EmptyVbSize); + emptyVb.SetData(0, new byte[EmptyVbSize]); + _vertexBuffers[0] = new BufferState(emptyVb.GetBuffer(), 0, EmptyVbSize, 0UL); + _needsVertexBuffersRebind = true; + + ClearScissor = new Rectangle(0, 0, 0xffff, 0xffff); + + var defaultScale = new Vector4 { X = 1f, Y = 0f, Z = 0f, W = 0f }; + new Span>(_renderScale).Fill(defaultScale); + + SupportBufferUpdater = new SupportBufferUpdater(gd); + SupportBufferUpdater.UpdateRenderScale(_renderScale, 0, SupportBuffer.RenderScaleMaxCount); + + _newState.Initialize(); + _newState.LineWidth = 1f; + _newState.SamplesCount = 1; + } + + public unsafe void Barrier() + { + MemoryBarrier memoryBarrier = new MemoryBarrier() + { + SType = StructureType.MemoryBarrier, + SrcAccessMask = AccessFlags.AccessMemoryReadBit | AccessFlags.AccessMemoryWriteBit, + DstAccessMask = AccessFlags.AccessMemoryReadBit | AccessFlags.AccessMemoryWriteBit + }; + + Gd.Api.CmdPipelineBarrier( + CommandBuffer, + PipelineStageFlags.PipelineStageFragmentShaderBit, + PipelineStageFlags.PipelineStageFragmentShaderBit, + 0, + 1, + memoryBarrier, + 0, + null, + 0, + null); + } + + public void BeginTransformFeedback(GAL.PrimitiveTopology topology) + { + _tfEnabled = true; + } + + public void ClearBuffer(BufferHandle destination, int offset, int size, uint value) + { + EndRenderPass(); + + var dst = Gd.BufferManager.GetBuffer(CommandBuffer, destination, true).Get(Cbs, offset, size).Value; + + BufferHolder.InsertBufferBarrier( + Gd, + Cbs.CommandBuffer, + dst, + BufferHolder.DefaultAccessFlags, + AccessFlags.AccessTransferWriteBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + PipelineStageFlags.PipelineStageTransferBit, + offset, + size); + + Gd.Api.CmdFillBuffer(CommandBuffer, dst, (ulong)offset, (ulong)size, value); + + BufferHolder.InsertBufferBarrier( + Gd, + Cbs.CommandBuffer, + dst, + AccessFlags.AccessTransferWriteBit, + BufferHolder.DefaultAccessFlags, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + offset, + size); + } + + public unsafe void ClearRenderTargetColor(int index, int layer, ColorF color) + { + if (FramebufferParams == null || !FramebufferParams.IsVaidColorAttachment(index)) + { + return; + } + + if (_renderPass == null) + { + CreateRenderPass(); + } + + BeginRenderPass(); + + var clearValue = new ClearValue(new ClearColorValue(color.Red, color.Green, color.Blue, color.Alpha)); + var attachment = new ClearAttachment(ImageAspectFlags.ImageAspectColorBit, (uint)index, clearValue); + var clearRect = FramebufferParams?.GetClearRect(ClearScissor, layer) ?? default; + + Gd.Api.CmdClearAttachments(CommandBuffer, 1, &attachment, 1, &clearRect); + } + + public unsafe void ClearRenderTargetDepthStencil(int layer, float depthValue, bool depthMask, int stencilValue, int stencilMask) + { + // TODO: Use stencilMask (fully) + + if (FramebufferParams == null || !FramebufferParams.HasDepthStencil) + { + return; + } + + if (_renderPass == null) + { + CreateRenderPass(); + } + + BeginRenderPass(); + + var clearValue = new ClearValue(null, new ClearDepthStencilValue(depthValue, (uint)stencilValue)); + var flags = depthMask ? ImageAspectFlags.ImageAspectDepthBit : 0; + + if (stencilMask != 0) + { + flags |= ImageAspectFlags.ImageAspectStencilBit; + } + + var attachment = new ClearAttachment(flags, 0, clearValue); + var clearRect = FramebufferParams?.GetClearRect(ClearScissor, layer) ?? default; + + Gd.Api.CmdClearAttachments(CommandBuffer, 1, &attachment, 1, &clearRect); + } + + public void CommandBufferBarrier() + { + // TODO: More specific barrier? + Barrier(); + } + + public void CopyBuffer(BufferHandle source, BufferHandle destination, int srcOffset, int dstOffset, int size) + { + EndRenderPass(); + + var src = Gd.BufferManager.GetBuffer(CommandBuffer, source, false); + var dst = Gd.BufferManager.GetBuffer(CommandBuffer, destination, true); + + BufferHolder.Copy(Gd, Cbs, src, dst, srcOffset, dstOffset, size); + } + + public void DispatchCompute(int groupsX, int groupsY, int groupsZ) + { + if (!_program.IsLinked) + { + return; + } + + EndRenderPass(); + RecreatePipelineIfNeeded(PipelineBindPoint.Compute); + + Gd.Api.CmdDispatch(CommandBuffer, (uint)groupsX, (uint)groupsY, (uint)groupsZ); + } + + public void Draw(int vertexCount, int instanceCount, int firstVertex, int firstInstance) + { + // System.Console.WriteLine("draw"); + + if (!_program.IsLinked) + { + return; + } + + RecreatePipelineIfNeeded(PipelineBindPoint.Graphics); + BeginRenderPass(); + ResumeTransformFeedbackInternal(); + DrawCount++; + + if (_topology == GAL.PrimitiveTopology.Quads) + { + int quadsCount = vertexCount / 4; + + for (int i = 0; i < quadsCount; i++) + { + Gd.Api.CmdDraw(CommandBuffer, 4, (uint)instanceCount, (uint)(firstVertex + i * 4), (uint)firstInstance); + } + } + else + { + Gd.Api.CmdDraw(CommandBuffer, (uint)vertexCount, (uint)instanceCount, (uint)firstVertex, (uint)firstInstance); + } + } + + public void DrawIndexed(int indexCount, int instanceCount, int firstIndex, int firstVertex, int firstInstance) + { + // System.Console.WriteLine("draw indexed"); + + if (!_program.IsLinked) + { + return; + } + + RecreatePipelineIfNeeded(PipelineBindPoint.Graphics); + BeginRenderPass(); + ResumeTransformFeedbackInternal(); + DrawCount++; + + if (_topology == GAL.PrimitiveTopology.Quads) + { + int quadsCount = indexCount / 4; + + for (int i = 0; i < quadsCount; i++) + { + Gd.Api.CmdDrawIndexed(CommandBuffer, 4, (uint)instanceCount, (uint)(firstIndex + i * 4), firstVertex, (uint)firstInstance); + } + } + else + { + Gd.Api.CmdDrawIndexed(CommandBuffer, (uint)indexCount, (uint)instanceCount, (uint)firstIndex, firstVertex, (uint)firstInstance); + } + } + + public void DrawTexture(ITexture texture, ISampler sampler, Extents2DF srcRegion, Extents2DF dstRegion) + { + if (texture is TextureView srcTexture) + { + SupportBufferUpdater.Commit(); + + var oldCullMode = _newState.CullMode; + var oldStencilTestEnable = _newState.StencilTestEnable; + var oldDepthTestEnable = _newState.DepthTestEnable; + var oldDepthWriteEnable = _newState.DepthWriteEnable; + var oldTopology = _newState.Topology; + var oldViewports = VulkanConfiguration.UseDynamicState ? _dynamicState.Viewports : _newState.Internal.Viewports; + var oldViewportsCount = _newState.ViewportsCount; + + _newState.CullMode = CullModeFlags.CullModeNone; + _newState.StencilTestEnable = false; + _newState.DepthTestEnable = false; + _newState.DepthWriteEnable = false; + SignalStateChange(); + + Gd.HelperShader.DrawTexture( + Gd, + this, + srcTexture, + sampler, + srcRegion, + dstRegion); + + _newState.CullMode = oldCullMode; + _newState.StencilTestEnable = oldStencilTestEnable; + _newState.DepthTestEnable = oldDepthTestEnable; + _newState.DepthWriteEnable = oldDepthWriteEnable; + _newState.Topology = oldTopology; + + if (VulkanConfiguration.UseDynamicState) + { + _dynamicState.Viewports = oldViewports; + _dynamicState.ViewportsCount = (int)oldViewportsCount; + _dynamicState.SetViewportsDirty(); + } + else + { + _newState.Internal.Viewports = oldViewports; + } + + _newState.ViewportsCount = oldViewportsCount; + SignalStateChange(); + } + } + + public void EndTransformFeedback() + { + PauseTransformFeedbackInternal(); + _tfEnabled = false; + } + + public void MultiDrawIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride) + { + if (!Gd.SupportsIndirectParameters) + { + throw new NotSupportedException(); + } + + if (_program.LinkStatus != ProgramLinkStatus.Success) + { + return; + } + + RecreatePipelineIfNeeded(PipelineBindPoint.Graphics); + BeginRenderPass(); + ResumeTransformFeedbackInternal(); + DrawCount++; + + var buffer = Gd.BufferManager.GetBuffer(CommandBuffer, indirectBuffer.Handle, true).Get(Cbs, indirectBuffer.Offset, indirectBuffer.Size).Value; + var countBuffer = Gd.BufferManager.GetBuffer(CommandBuffer, parameterBuffer.Handle, true).Get(Cbs, parameterBuffer.Offset, parameterBuffer.Size).Value; + + Gd.DrawIndirectCountApi.CmdDrawIndirectCount( + CommandBuffer, + buffer, + (ulong)indirectBuffer.Offset, + countBuffer, + (ulong)parameterBuffer.Offset, + (uint)maxDrawCount, + (uint)stride); + } + + public void MultiDrawIndexedIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride) + { + if (!Gd.SupportsIndirectParameters) + { + throw new NotSupportedException(); + } + + if (_program.LinkStatus != ProgramLinkStatus.Success) + { + return; + } + + RecreatePipelineIfNeeded(PipelineBindPoint.Graphics); + BeginRenderPass(); + ResumeTransformFeedbackInternal(); + DrawCount++; + + var buffer = Gd.BufferManager.GetBuffer(CommandBuffer, indirectBuffer.Handle, true).Get(Cbs, indirectBuffer.Offset, indirectBuffer.Size).Value; + var countBuffer = Gd.BufferManager.GetBuffer(CommandBuffer, parameterBuffer.Handle, true).Get(Cbs, parameterBuffer.Offset, parameterBuffer.Size).Value; + + Gd.DrawIndirectCountApi.CmdDrawIndexedIndirectCount( + CommandBuffer, + buffer, + (ulong)indirectBuffer.Offset, + countBuffer, + (ulong)parameterBuffer.Offset, + (uint)maxDrawCount, + (uint)stride); + } + + public void SetAlphaTest(bool enable, float reference, GAL.CompareOp op) + { + // TODO. + } + + public void SetBlendState(int index, BlendDescriptor blend) + { + ref var vkBlend = ref _newState.Internal.ColorBlendAttachmentState[index]; + + vkBlend.BlendEnable = blend.Enable; + vkBlend.SrcColorBlendFactor = blend.ColorSrcFactor.Convert(); + vkBlend.DstColorBlendFactor = blend.ColorDstFactor.Convert(); + vkBlend.ColorBlendOp = blend.ColorOp.Convert(); + vkBlend.SrcAlphaBlendFactor = blend.AlphaSrcFactor.Convert(); + vkBlend.DstAlphaBlendFactor = blend.AlphaDstFactor.Convert(); + vkBlend.AlphaBlendOp = blend.AlphaOp.Convert(); + + _newState.BlendConstantR = blend.BlendConstant.Red; + _newState.BlendConstantG = blend.BlendConstant.Green; + _newState.BlendConstantB = blend.BlendConstant.Blue; + _newState.BlendConstantA = blend.BlendConstant.Alpha; + + SignalStateChange(); + } + + public void SetDepthBias(PolygonModeMask enables, float factor, float units, float clamp) + { + if (VulkanConfiguration.UseDynamicState) + { + _dynamicState.SetDepthBias(factor, units, clamp); + } + else + { + _newState.DepthBiasSlopeFactor = factor; + _newState.DepthBiasConstantFactor = units; + _newState.DepthBiasClamp = clamp; + } + + _newState.DepthBiasEnable = enables != 0; + SignalStateChange(); + } + + public void SetDepthClamp(bool clamp) + { + _newState.DepthClampEnable = clamp; + SignalStateChange(); + } + + public void SetDepthMode(DepthMode mode) + { + // TODO. + } + + public void SetDepthTest(DepthTestDescriptor depthTest) + { + _newState.DepthTestEnable = depthTest.TestEnable; + _newState.DepthWriteEnable = depthTest.WriteEnable; + _newState.DepthCompareOp = depthTest.Func.Convert(); + SignalStateChange(); + } + + public void SetFaceCulling(bool enable, Face face) + { + _newState.CullMode = enable ? face.Convert() : CullModeFlags.CullModeNone; + SignalStateChange(); + } + + public void SetFrontFace(GAL.FrontFace frontFace) + { + _newState.FrontFace = frontFace.Convert(); + SignalStateChange(); + } + + public void SetImage(int binding, ITexture image, GAL.Format imageFormat) + { + _descriptorSetUpdater.SetImage(binding, image, imageFormat); + } + + public void SetIndexBuffer(BufferRange buffer, GAL.IndexType type) + { + _indexBuffer.Dispose(); + + if (buffer.Handle != BufferHandle.Null) + { + Auto ib = null; + int offset = buffer.Offset; + int size = buffer.Size; + + if (type == GAL.IndexType.UByte && !Gd.SupportsIndexTypeUint8) + { + ib = Gd.BufferManager.GetBufferI8ToI16(Cbs, buffer.Handle, offset, size); + offset = 0; + size *= 2; + type = GAL.IndexType.UShort; + } + else + { + ib = Gd.BufferManager.GetBuffer(CommandBuffer, buffer.Handle, false); + } + + _indexBuffer = new BufferState(ib, offset, size, type.Convert()); + } + else + { + _indexBuffer = BufferState.Null; + } + + _indexBuffer.BindIndexBuffer(Gd.Api, Cbs); + } + + public void SetLineParameters(float width, bool smooth) + { + _newState.LineWidth = width; + SignalStateChange(); + } + + public void SetLogicOpState(bool enable, LogicalOp op) + { + _newState.LogicOpEnable = enable; + _newState.LogicOp = op.Convert(); + SignalStateChange(); + } + + public void SetOrigin(Origin origin) + { + // TODO. + } + + public unsafe void SetPatchParameters(int vertices, ReadOnlySpan defaultOuterLevel, ReadOnlySpan defaultInnerLevel) + { + _newState.PatchControlPoints = (uint)vertices; + SignalStateChange(); + + // TODO: Default levels (likely needs emulation on shaders?) + } + + public void SetPointParameters(float size, bool isProgramPointSize, bool enablePointSprite, Origin origin) + { + // TODO. + } + + public void SetPolygonMode(GAL.PolygonMode frontMode, GAL.PolygonMode backMode) + { + // TODO. + } + + public void SetPrimitiveRestart(bool enable, int index) + { + _newState.PrimitiveRestartEnable = enable; + // TODO: What to do about the index? + SignalStateChange(); + } + + public void SetPrimitiveTopology(GAL.PrimitiveTopology topology) + { + _topology = topology; + + var vkTopology = topology.Convert(); + + _newState.Topology = vkTopology; + + SignalStateChange(); + } + + public void SetProgram(IProgram program) + { + var internalProgram = (ShaderCollection)program; + var stages = internalProgram.GetInfos(); + + _program = internalProgram; + + _descriptorSetUpdater.SetProgram(internalProgram); + + _newState.PipelineLayout = internalProgram.PipelineLayout; + _newState.StagesCount = (uint)stages.Length; + + stages.CopyTo(_newState.Stages.ToSpan().Slice(0, stages.Length)); + + SignalStateChange(); + } + + protected virtual void SignalAttachmentChange() + { + } + + public void SetRasterizerDiscard(bool discard) + { + _newState.RasterizerDiscardEnable = discard; + SignalStateChange(); + } + + public void SetRenderTargetColorMasks(ReadOnlySpan componentMask) + { + int count = Math.Min(Constants.MaxRenderTargets, componentMask.Length); + int writtenAttachments = 0; + + for (int i = 0; i < count; i++) + { + ref var vkBlend = ref _newState.Internal.ColorBlendAttachmentState[i]; + + vkBlend.ColorWriteMask = (ColorComponentFlags)componentMask[i]; + + if (componentMask[i] != 0) + { + writtenAttachments++; + } + } + + SignalStateChange(); + + if (writtenAttachments != _writtenAttachmentCount) + { + SignalAttachmentChange(); + _writtenAttachmentCount = writtenAttachments; + } + } + + public void SetRenderTargets(ITexture[] colors, ITexture depthStencil) + { + FramebufferParams?.UpdateModifications(); + CreateFramebuffer(colors, depthStencil); + CreateRenderPass(); + SignalStateChange(); + SignalAttachmentChange(); + } + + public void SetRenderTargetScale(float scale) + { + _renderScale[0].X = scale; + SupportBufferUpdater.UpdateRenderScale(_renderScale, 0, 1); // Just the first element. + } + + public void SetScissors(ReadOnlySpan> regions) + { + int maxScissors = Gd.Capabilities.SupportsMultiView ? Constants.MaxViewports : 1; + int count = Math.Min(maxScissors, regions.Length); + if (count > 0) + { + ClearScissor = regions[0]; + } + + for (int i = 0; i < count; i++) + { + var region = regions[i]; + var offset = new Offset2D(region.X, region.Y); + var extent = new Extent2D((uint)region.Width, (uint)region.Height); + + if (VulkanConfiguration.UseDynamicState) + { + _dynamicState.SetScissor(i, new Rect2D(offset, extent)); + } + else + { + _newState.Internal.Scissors[i] = new Rect2D(offset, extent); + } + } + + if (VulkanConfiguration.UseDynamicState) + { + _dynamicState.ScissorsCount = count; + } + + _newState.ScissorsCount = (uint)count; + SignalStateChange(); + } + + public void SetStencilTest(StencilTestDescriptor stencilTest) + { + if (VulkanConfiguration.UseDynamicState) + { + _dynamicState.SetStencilMasks( + (uint)stencilTest.BackFuncMask, + (uint)stencilTest.BackMask, + (uint)stencilTest.BackFuncRef, + (uint)stencilTest.FrontFuncMask, + (uint)stencilTest.FrontMask, + (uint)stencilTest.FrontFuncRef); + } + else + { + _newState.StencilBackCompareMask = (uint)stencilTest.BackFuncMask; + _newState.StencilBackWriteMask = (uint)stencilTest.BackMask; + _newState.StencilBackReference = (uint)stencilTest.BackFuncRef; + _newState.StencilFrontCompareMask = (uint)stencilTest.FrontFuncMask; + _newState.StencilFrontWriteMask = (uint)stencilTest.FrontMask; + _newState.StencilFrontReference = (uint)stencilTest.FrontFuncRef; + } + + _newState.StencilTestEnable = stencilTest.TestEnable; + _newState.StencilBackFailOp = stencilTest.BackSFail.Convert(); + _newState.StencilBackPassOp = stencilTest.BackDpPass.Convert(); + _newState.StencilBackDepthFailOp = stencilTest.BackDpFail.Convert(); + _newState.StencilBackCompareOp = stencilTest.BackFunc.Convert(); + _newState.StencilFrontFailOp = stencilTest.FrontSFail.Convert(); + _newState.StencilFrontPassOp = stencilTest.FrontDpPass.Convert(); + _newState.StencilFrontDepthFailOp = stencilTest.FrontDpFail.Convert(); + _newState.StencilFrontCompareOp = stencilTest.FrontFunc.Convert(); + SignalStateChange(); + } + + public void SetStorageBuffers(int first, ReadOnlySpan buffers) + { + _descriptorSetUpdater.SetStorageBuffers(CommandBuffer, first, buffers); + } + + public void SetTextureAndSampler(ShaderStage stage, int binding, ITexture texture, ISampler sampler) + { + _descriptorSetUpdater.SetTextureAndSampler(Cbs, stage, binding, texture, sampler); + } + + public void SetTransformFeedbackBuffers(ReadOnlySpan buffers) + { + PauseTransformFeedbackInternal(); + + int count = Math.Min(Constants.MaxTransformFeedbackBuffers, buffers.Length); + + for (int i = 0; i < count; i++) + { + var range = buffers[i]; + + _transformFeedbackBuffers[i].Dispose(); + + if (range.Handle != BufferHandle.Null) + { + _transformFeedbackBuffers[i] = new BufferState(Gd.BufferManager.GetBuffer(CommandBuffer, range.Handle, true), range.Offset, range.Size); + _transformFeedbackBuffers[i].BindTransformFeedbackBuffer(Gd, Cbs, (uint)i); + } + else + { + _transformFeedbackBuffers[i] = BufferState.Null; + } + } + } + + public void SetUniformBuffers(int first, ReadOnlySpan buffers) + { + _descriptorSetUpdater.SetUniformBuffers(CommandBuffer, first, buffers); + } + + public void SetUserClipDistance(int index, bool enableClip) + { + // TODO. + } + + public void SetVertexAttribs(ReadOnlySpan vertexAttribs) + { + int count = Math.Min(Constants.MaxVertexAttributes, vertexAttribs.Length); + + for (int i = 0; i < count; i++) + { + var attribute = vertexAttribs[i]; + var bufferIndex = attribute.IsZero ? 0 : attribute.BufferIndex + 1; + + _newState.Internal.VertexAttributeDescriptions[i] = new VertexInputAttributeDescription( + (uint)i, + (uint)bufferIndex, + FormatTable.GetFormat(attribute.Format), + (uint)attribute.Offset); + } + + _newState.VertexAttributeDescriptionsCount = (uint)count; + SignalStateChange(); + } + + public void SetVertexBuffers(ReadOnlySpan vertexBuffers) + { + int count = Math.Min(Constants.MaxVertexBuffers, vertexBuffers.Length); + + _newState.Internal.VertexBindingDescriptions[0] = new VertexInputBindingDescription(0, 0, VertexInputRate.Vertex); + + int validCount = 1; + + for (int i = 0; i < count; i++) + { + var vertexBuffer = vertexBuffers[i]; + + // TODO: Support divisor > 1 + var inputRate = vertexBuffer.Divisor != 0 ? VertexInputRate.Instance : VertexInputRate.Vertex; + + if (vertexBuffer.Buffer.Handle != BufferHandle.Null) + { + var vb = Gd.BufferManager.GetBuffer(CommandBuffer, vertexBuffer.Buffer.Handle, false); + if (vb != null) + { + int binding = i + 1; + int descriptorIndex = validCount++; + + _newState.Internal.VertexBindingDescriptions[descriptorIndex] = new VertexInputBindingDescription( + (uint)binding, + (uint)vertexBuffer.Stride, + inputRate); + + int vbSize = vertexBuffer.Buffer.Size; + + if (Gd.Vendor == Vendor.Amd && vertexBuffer.Stride > 0) + { + // AMD has a bug where if offset + stride * count is greater than + // the size, then the last attribute will have the wrong value. + // As a workaround, simply use the full buffer size. + int remainder = vbSize % vertexBuffer.Stride; + if (remainder != 0) + { + vbSize += vertexBuffer.Stride - remainder; + } + } + + _vertexBuffers[binding].Dispose(); + _vertexBuffers[binding] = new BufferState( + vb, + vertexBuffer.Buffer.Offset, + vbSize, + (ulong)vertexBuffer.Stride); + + _vertexBuffers[binding].BindVertexBuffer(Gd, Cbs, (uint)binding); + } + } + } + + _newState.VertexBindingDescriptionsCount = (uint)validCount; + SignalStateChange(); + } + + // TODO: Remove first parameter. + public void SetViewports(int first, ReadOnlySpan viewports, bool disableTransform) + { + int maxViewports = Gd.Capabilities.SupportsMultiView ? Constants.MaxViewports : 1; + int count = Math.Min(maxViewports, viewports.Length); + + static float Clamp(float value) + { + return Math.Clamp(value, 0f, 1f); + } + + if (VulkanConfiguration.UseDynamicState) + { + for (int i = 0; i < count; i++) + { + var viewport = viewports[i]; + + _dynamicState.SetViewport(i, new Silk.NET.Vulkan.Viewport( + viewport.Region.X, + viewport.Region.Y, + viewport.Region.Width == 0f ? 1f : viewport.Region.Width, + viewport.Region.Height == 0f ? 1f : viewport.Region.Height, + Clamp(viewport.DepthNear), + Clamp(viewport.DepthFar))); + } + + _dynamicState.ViewportsCount = count; + } + else + { + for (int i = 0; i < count; i++) + { + var viewport = viewports[i]; + + ref var vkViewport = ref _newState.Internal.Viewports[i]; + + vkViewport.X = viewport.Region.X; + vkViewport.Y = viewport.Region.Y; + vkViewport.Width = viewport.Region.Width == 0f ? 1f : viewport.Region.Width; + vkViewport.Height = viewport.Region.Height == 0f ? 1f : viewport.Region.Height; + vkViewport.MinDepth = Clamp(viewport.DepthNear); + vkViewport.MaxDepth = Clamp(viewport.DepthFar); + } + } + + float disableTransformF = disableTransform ? 1.0f : 0.0f; + if (SupportBufferUpdater.Data.ViewportInverse.W != disableTransformF || disableTransform) + { + float scale = _renderScale[0].X; + SupportBufferUpdater.UpdateViewportInverse(new Vector4 + { + X = scale * 2f / viewports[first].Region.Width, + Y = scale * 2f / viewports[first].Region.Height, + Z = 1, + W = disableTransformF + }); + } + + _newState.ViewportsCount = (uint)count; + SignalStateChange(); + } + + public unsafe void TextureBarrier() + { + MemoryBarrier memoryBarrier = new MemoryBarrier() + { + SType = StructureType.MemoryBarrier, + SrcAccessMask = AccessFlags.AccessMemoryReadBit | AccessFlags.AccessMemoryWriteBit, + DstAccessMask = AccessFlags.AccessMemoryReadBit | AccessFlags.AccessMemoryWriteBit + }; + + Gd.Api.CmdPipelineBarrier( + CommandBuffer, + PipelineStageFlags.PipelineStageFragmentShaderBit, + PipelineStageFlags.PipelineStageFragmentShaderBit, + 0, + 1, + memoryBarrier, + 0, + null, + 0, + null); + } + + public void TextureBarrierTiled() + { + TextureBarrier(); + } + + public void UpdateRenderScale(ReadOnlySpan scales, int totalCount, int fragmentCount) + { + bool changed = false; + + for (int index = 0; index < totalCount; index++) + { + if (_renderScale[1 + index].X != scales[index]) + { + _renderScale[1 + index].X = scales[index]; + changed = true; + } + } + + // Only update fragment count if there are scales after it for the vertex stage. + if (fragmentCount != totalCount && fragmentCount != _fragmentScaleCount) + { + _fragmentScaleCount = fragmentCount; + SupportBufferUpdater.UpdateFragmentRenderScaleCount(_fragmentScaleCount); + } + + if (changed) + { + SupportBufferUpdater.UpdateRenderScale(_renderScale, 0, 1 + totalCount); + } + } + + protected void SignalCommandBufferChange() + { + _needsIndexBufferRebind = true; + _needsTransformFeedbackBuffersRebind = true; + _needsVertexBuffersRebind = true; + + _descriptorSetUpdater.SignalCommandBufferChange(); + _dynamicState.ForceAllDirty(); + _currentPipelineHandle = 0; + } + + private void CreateFramebuffer(ITexture[] colors, ITexture depthStencil) + { + FramebufferParams = new FramebufferParams(Device, colors, depthStencil); + UpdatePipelineAttachmentFormats(); + _newState.SamplesCount = FramebufferParams.AttachmentSamples.Length != 0 ? FramebufferParams.AttachmentSamples[0] : 1; + } + + protected void UpdatePipelineAttachmentFormats() + { + var dstAttachmentFormats = _newState.Internal.AttachmentFormats.ToSpan(); + FramebufferParams.AttachmentFormats.CopyTo(dstAttachmentFormats); + + int maxAttachmentIndex = FramebufferParams.MaxColorAttachmentIndex + (FramebufferParams.HasDepthStencil ? 1 : 0); + for (int i = FramebufferParams.AttachmentFormats.Length; i <= maxAttachmentIndex; i++) + { + dstAttachmentFormats[i] = 0; + } + + _newState.ColorBlendAttachmentStateCount = (uint)(FramebufferParams.MaxColorAttachmentIndex + 1); + _newState.HasDepthStencil = FramebufferParams.HasDepthStencil; + } + + protected unsafe void CreateRenderPass() + { + const int MaxAttachments = Constants.MaxRenderTargets + 1; + + AttachmentDescription[] attachmentDescs = null; + + var subpass = new SubpassDescription() + { + PipelineBindPoint = PipelineBindPoint.Graphics + }; + + AttachmentReference* attachmentReferences = stackalloc AttachmentReference[MaxAttachments]; + + var hasFramebuffer = FramebufferParams != null; + + if (hasFramebuffer && FramebufferParams.AttachmentsCount != 0) + { + attachmentDescs = new AttachmentDescription[FramebufferParams.AttachmentsCount]; + + for (int i = 0; i < FramebufferParams.AttachmentsCount; i++) + { + int bindIndex = FramebufferParams.AttachmentIndices[i]; + + attachmentDescs[i] = new AttachmentDescription( + 0, + FramebufferParams.AttachmentFormats[i], + TextureStorage.ConvertToSampleCountFlags(FramebufferParams.AttachmentSamples[i]), + AttachmentLoadOp.Load, + AttachmentStoreOp.Store, + AttachmentLoadOp.Load, + AttachmentStoreOp.Store, + ImageLayout.General, + ImageLayout.General); + } + + int colorAttachmentsCount = FramebufferParams.ColorAttachmentsCount; + + if (colorAttachmentsCount > MaxAttachments - 1) + { + colorAttachmentsCount = MaxAttachments - 1; + } + + if (colorAttachmentsCount != 0) + { + int maxAttachmentIndex = FramebufferParams.MaxColorAttachmentIndex; + subpass.ColorAttachmentCount = (uint)maxAttachmentIndex + 1; + subpass.PColorAttachments = &attachmentReferences[0]; + + // Fill with VK_ATTACHMENT_UNUSED to cover any gaps. + for (int i = 0; i <= maxAttachmentIndex; i++) + { + subpass.PColorAttachments[i] = new AttachmentReference(Vk.AttachmentUnused, ImageLayout.Undefined); + } + + for (int i = 0; i < colorAttachmentsCount; i++) + { + int bindIndex = FramebufferParams.AttachmentIndices[i]; + + subpass.PColorAttachments[bindIndex] = new AttachmentReference((uint)i, ImageLayout.General); + } + } + + if (FramebufferParams.HasDepthStencil) + { + uint dsIndex = (uint)FramebufferParams.AttachmentsCount - 1; + + subpass.PDepthStencilAttachment = &attachmentReferences[MaxAttachments - 1]; + *subpass.PDepthStencilAttachment = new AttachmentReference(dsIndex, ImageLayout.General); + } + } + + var subpassDependency = new SubpassDependency( + 0, + 0, + PipelineStageFlags.PipelineStageAllGraphicsBit, + PipelineStageFlags.PipelineStageAllGraphicsBit, + AccessFlags.AccessMemoryReadBit | AccessFlags.AccessMemoryWriteBit | AccessFlags.AccessColorAttachmentWriteBit, + AccessFlags.AccessMemoryReadBit | AccessFlags.AccessMemoryWriteBit | AccessFlags.AccessShaderReadBit, + 0); + + fixed (AttachmentDescription* pAttachmentDescs = attachmentDescs) + { + var renderPassCreateInfo = new RenderPassCreateInfo() + { + SType = StructureType.RenderPassCreateInfo, + PAttachments = pAttachmentDescs, + AttachmentCount = attachmentDescs != null ? (uint)attachmentDescs.Length : 0, + PSubpasses = &subpass, + SubpassCount = 1, + PDependencies = &subpassDependency, + DependencyCount = 1 + }; + + Gd.Api.CreateRenderPass(Device, renderPassCreateInfo, null, out var renderPass).ThrowOnError(); + + _renderPass?.Dispose(); + _renderPass = new Auto(new DisposableRenderPass(Gd.Api, Device, renderPass)); + } + + EndRenderPass(); + + _framebuffer?.Dispose(); + _framebuffer = hasFramebuffer ? FramebufferParams.Create(Gd.Api, Cbs, _renderPass) : null; + } + + protected void SignalStateChange([System.Runtime.CompilerServices.CallerMemberName] string caller = "") + { + // System.Console.WriteLine("state change by " + caller); + _stateDirty = true; + } + + private void RecreatePipelineIfNeeded(PipelineBindPoint pbp) + { + _dynamicState.ReplayIfDirty(Gd.Api, CommandBuffer); + + // Commit changes to the support buffer before drawing. + SupportBufferUpdater.Commit(); + + if (_stateDirty || Pbp != pbp) + { + CreatePipeline(pbp); + _stateDirty = false; + Pbp = pbp; + } + + if (_needsIndexBufferRebind) + { + _indexBuffer.BindIndexBuffer(Gd.Api, Cbs); + _needsIndexBufferRebind = false; + } + + if (_needsTransformFeedbackBuffersRebind) + { + PauseTransformFeedbackInternal(); + + for (int i = 0; i < Constants.MaxTransformFeedbackBuffers; i++) + { + _transformFeedbackBuffers[i].BindTransformFeedbackBuffer(Gd, Cbs, (uint)i); + } + + _needsTransformFeedbackBuffersRebind = false; + } + + if (_needsVertexBuffersRebind) + { + for (int i = 0; i < Constants.MaxVertexBuffers + 1; i++) + { + _vertexBuffers[i].BindVertexBuffer(Gd, Cbs, (uint)i); + } + + _needsVertexBuffersRebind = false; + } + + _descriptorSetUpdater.UpdateAndBindDescriptorSets(Cbs, pbp); + } + + private void CreatePipeline(PipelineBindPoint pbp) + { + // We can only create a pipeline if the have the shader stages set. + if (_newState.Stages != null) + { + if (pbp == PipelineBindPoint.Graphics && _renderPass == null) + { + CreateRenderPass(); + } + + var pipeline = pbp == PipelineBindPoint.Compute + ? _newState.CreateComputePipeline(Gd, Device, _program, PipelineCache) + : _newState.CreateGraphicsPipeline(Gd, Device, _program, PipelineCache, _renderPass.Get(Cbs).Value); + + ulong pipelineHandle = pipeline.GetUnsafe().Value.Handle; + + if (_currentPipelineHandle != pipelineHandle) + { + _currentPipelineHandle = pipelineHandle; + // _pipeline?.Dispose(); + Pipeline = pipeline; + + PauseTransformFeedbackInternal(); + Gd.Api.CmdBindPipeline(CommandBuffer, pbp, Pipeline.Get(Cbs).Value); + } + } + } + + private unsafe void BeginRenderPass() + { + if (!_renderPassActive) + { + var renderArea = new Rect2D(null, new Extent2D(FramebufferParams.Width, FramebufferParams.Height)); + var clearValue = new ClearValue(); + + var renderPassBeginInfo = new RenderPassBeginInfo() + { + SType = StructureType.RenderPassBeginInfo, + RenderPass = _renderPass.Get(Cbs).Value, + Framebuffer = _framebuffer.Get(Cbs).Value, + RenderArea = renderArea, + PClearValues = &clearValue, + ClearValueCount = 1 + }; + + Gd.Api.CmdBeginRenderPass(CommandBuffer, renderPassBeginInfo, SubpassContents.Inline); + _renderPassActive = true; + } + } + + public void EndRenderPass() + { + if (_renderPassActive) + { + PauseTransformFeedbackInternal(); + // System.Console.WriteLine("render pass ended " + caller); + Gd.Api.CmdEndRenderPass(CommandBuffer); + SignalRenderPassEnd(); + _renderPassActive = false; + } + } + + protected virtual void SignalRenderPassEnd() + { + } + + private void PauseTransformFeedbackInternal() + { + if (_tfEnabled && _tfActive) + { + EndTransformFeedbackInternal(); + _tfActive = false; + } + } + + private void ResumeTransformFeedbackInternal() + { + if (_tfEnabled && !_tfActive) + { + BeginTransformFeedbackInternal(); + _tfActive = true; + } + } + + private unsafe void BeginTransformFeedbackInternal() + { + Gd.TransformFeedbackApi.CmdBeginTransformFeedback(CommandBuffer, 0, 0, null, null); + } + + private unsafe void EndTransformFeedbackInternal() + { + Gd.TransformFeedbackApi.CmdEndTransformFeedback(CommandBuffer, 0, 0, null, null); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _renderPass?.Dispose(); + _framebuffer?.Dispose(); + _indexBuffer.Dispose(); + _newState.Dispose(); + _descriptorSetUpdater.Dispose(); + + for (int i = 0; i < _vertexBuffers.Length; i++) + { + _vertexBuffers[i].Dispose(); + } + + for (int i = 0; i < _transformFeedbackBuffers.Length; i++) + { + _transformFeedbackBuffers[i].Dispose(); + } + + Pipeline?.Dispose(); + + unsafe + { + Gd.Api.DestroyPipelineCache(Device, PipelineCache, null); + } + + SupportBufferUpdater.Dispose(); + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineConverter.cs b/Ryujinx.Graphics.Vulkan/PipelineConverter.cs new file mode 100644 index 000000000..c1b17d9ea --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineConverter.cs @@ -0,0 +1,278 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + static class PipelineConverter + { + public static unsafe DisposableRenderPass ToRenderPass(this ProgramPipelineState state, VulkanGraphicsDevice gd, Device device) + { + const int MaxAttachments = Constants.MaxRenderTargets + 1; + + AttachmentDescription[] attachmentDescs = null; + + var subpass = new SubpassDescription() + { + PipelineBindPoint = PipelineBindPoint.Graphics + }; + + AttachmentReference* attachmentReferences = stackalloc AttachmentReference[MaxAttachments]; + + Span attachmentIndices = stackalloc int[MaxAttachments]; + Span attachmentFormats = stackalloc Silk.NET.Vulkan.Format[MaxAttachments]; + + int attachmentCount = 0; + int colorCount = 0; + int maxColorAttachmentIndex = 0; + + for (int i = 0; i < state.AttachmentEnable.Length; i++) + { + if (state.AttachmentEnable[i]) + { + maxColorAttachmentIndex = i; + + attachmentFormats[attachmentCount] = gd.FormatCapabilities.ConvertToVkFormat(state.AttachmentFormats[i]); + + attachmentIndices[attachmentCount++] = i; + colorCount++; + } + } + + if (state.DepthStencilEnable) + { + attachmentFormats[attachmentCount++] = gd.FormatCapabilities.ConvertToVkFormat(state.DepthStencilFormat); + } + + if (attachmentCount != 0) + { + attachmentDescs = new AttachmentDescription[attachmentCount]; + + for (int i = 0; i < attachmentCount; i++) + { + int bindIndex = attachmentIndices[i]; + + attachmentDescs[i] = new AttachmentDescription( + 0, + attachmentFormats[i], + TextureStorage.ConvertToSampleCountFlags((uint)state.SamplesCount), + AttachmentLoadOp.Load, + AttachmentStoreOp.Store, + AttachmentLoadOp.Load, + AttachmentStoreOp.Store, + ImageLayout.General, + ImageLayout.General); + } + + int colorAttachmentsCount = colorCount; + + if (colorAttachmentsCount > MaxAttachments - 1) + { + colorAttachmentsCount = MaxAttachments - 1; + } + + if (colorAttachmentsCount != 0) + { + int maxAttachmentIndex = Constants.MaxRenderTargets - 1; + subpass.ColorAttachmentCount = (uint)maxAttachmentIndex + 1; + subpass.PColorAttachments = &attachmentReferences[0]; + + // Fill with VK_ATTACHMENT_UNUSED to cover any gaps. + for (int i = 0; i <= maxAttachmentIndex; i++) + { + subpass.PColorAttachments[i] = new AttachmentReference(Vk.AttachmentUnused, ImageLayout.Undefined); + } + + for (int i = 0; i < colorAttachmentsCount; i++) + { + int bindIndex = attachmentIndices[i]; + + subpass.PColorAttachments[bindIndex] = new AttachmentReference((uint)i, ImageLayout.General); + } + } + + if (state.DepthStencilEnable) + { + uint dsIndex = (uint)attachmentCount - 1; + + subpass.PDepthStencilAttachment = &attachmentReferences[MaxAttachments - 1]; + *subpass.PDepthStencilAttachment = new AttachmentReference(dsIndex, ImageLayout.General); + } + } + + var subpassDependency = new SubpassDependency( + 0, + 0, + PipelineStageFlags.PipelineStageAllGraphicsBit, + PipelineStageFlags.PipelineStageAllGraphicsBit, + AccessFlags.AccessMemoryReadBit | AccessFlags.AccessMemoryWriteBit, + AccessFlags.AccessMemoryReadBit | AccessFlags.AccessMemoryWriteBit, + 0); + + fixed (AttachmentDescription* pAttachmentDescs = attachmentDescs) + { + var renderPassCreateInfo = new RenderPassCreateInfo() + { + SType = StructureType.RenderPassCreateInfo, + PAttachments = pAttachmentDescs, + AttachmentCount = attachmentDescs != null ? (uint)attachmentDescs.Length : 0, + PSubpasses = &subpass, + SubpassCount = 1, + PDependencies = &subpassDependency, + DependencyCount = 1 + }; + + gd.Api.CreateRenderPass(device, renderPassCreateInfo, null, out var renderPass).ThrowOnError(); + + return new DisposableRenderPass(gd.Api, device, renderPass); + } + } + + public static PipelineState ToVulkanPipelineState(this ProgramPipelineState state, VulkanGraphicsDevice gd) + { + PipelineState pipeline = new PipelineState(); + pipeline.Initialize(); + + // It is assumed that Dynamic State is enabled when this conversion is used. + + pipeline.BlendConstantA = state.BlendDescriptors[0].BlendConstant.Alpha; + pipeline.BlendConstantB = state.BlendDescriptors[0].BlendConstant.Blue; + pipeline.BlendConstantG = state.BlendDescriptors[0].BlendConstant.Green; + pipeline.BlendConstantR = state.BlendDescriptors[0].BlendConstant.Red; + + pipeline.CullMode = state.CullEnable ? state.CullMode.Convert() : CullModeFlags.CullModeNone; + + pipeline.DepthBoundsTestEnable = false; // Not implemented. + + pipeline.DepthClampEnable = state.DepthClampEnable; + + pipeline.DepthTestEnable = state.DepthTest.TestEnable; + pipeline.DepthWriteEnable = state.DepthTest.WriteEnable; + pipeline.DepthCompareOp = state.DepthTest.Func.Convert(); + + pipeline.FrontFace = state.FrontFace.Convert(); + + pipeline.HasDepthStencil = state.DepthStencilEnable; + pipeline.LineWidth = state.LineWidth; + pipeline.LogicOpEnable = state.LogicOpEnable; + pipeline.LogicOp = state.LogicOp.Convert(); + + pipeline.MinDepthBounds = 0f; // Not implemented. + pipeline.MaxDepthBounds = 0f; // Not implemented. + + pipeline.PatchControlPoints = state.PatchControlPoints; + pipeline.PolygonMode = Silk.NET.Vulkan.PolygonMode.Fill; // Not implemented. + pipeline.PrimitiveRestartEnable = state.PrimitiveRestartEnable; + pipeline.RasterizerDiscardEnable = state.RasterizerDiscard; + pipeline.SamplesCount = (uint)state.SamplesCount; + + if (gd.Capabilities.SupportsMultiView) + { + pipeline.ScissorsCount = Constants.MaxViewports; + pipeline.ViewportsCount = Constants.MaxViewports; + } + else + { + pipeline.ScissorsCount = 1; + pipeline.ViewportsCount = 1; + } + + pipeline.DepthBiasEnable = state.BiasEnable != 0; + + // Stencil masks and ref are dynamic, so are 0 in the Vulkan pipeline. + + pipeline.StencilFrontFailOp = state.StencilTest.FrontSFail.Convert(); + pipeline.StencilFrontPassOp = state.StencilTest.FrontDpPass.Convert(); + pipeline.StencilFrontDepthFailOp = state.StencilTest.FrontDpFail.Convert(); + pipeline.StencilFrontCompareOp = state.StencilTest.FrontFunc.Convert(); + pipeline.StencilFrontCompareMask = 0; + pipeline.StencilFrontWriteMask = 0; + pipeline.StencilFrontReference = 0; + + pipeline.StencilBackFailOp = state.StencilTest.BackSFail.Convert(); + pipeline.StencilBackPassOp = state.StencilTest.BackDpPass.Convert(); + pipeline.StencilBackDepthFailOp = state.StencilTest.BackDpFail.Convert(); + pipeline.StencilBackCompareOp = state.StencilTest.BackFunc.Convert(); + pipeline.StencilBackCompareMask = 0; + pipeline.StencilBackWriteMask = 0; + pipeline.StencilBackReference = 0; + + pipeline.StencilTestEnable = state.StencilTest.TestEnable; + + pipeline.Topology = state.Topology.Convert(); + + int vaCount = Math.Min(Constants.MaxVertexAttributes, state.VertexAttribCount); + + for (int i = 0; i < vaCount; i++) + { + var attribute = state.VertexAttribs[i]; + var bufferIndex = attribute.IsZero ? 0 : attribute.BufferIndex + 1; + + pipeline.Internal.VertexAttributeDescriptions[i] = new VertexInputAttributeDescription( + (uint)i, + (uint)bufferIndex, + FormatTable.GetFormat(attribute.Format), + (uint)attribute.Offset); + } + + int descriptorIndex = 1; + pipeline.Internal.VertexBindingDescriptions[0] = new VertexInputBindingDescription(0, 0, VertexInputRate.Vertex); + + int vbCount = Math.Min(Constants.MaxVertexBuffers, state.VertexBufferCount); + + for (int i = 0; i < vbCount; i++) + { + var vertexBuffer = state.VertexBuffers[i]; + + if (vertexBuffer.Enable) + { + var inputRate = vertexBuffer.Divisor != 0 ? VertexInputRate.Instance : VertexInputRate.Vertex; + + // TODO: Support divisor > 1 + pipeline.Internal.VertexBindingDescriptions[descriptorIndex++] = new VertexInputBindingDescription( + (uint)i + 1, + (uint)vertexBuffer.Stride, + inputRate); + } + } + + pipeline.VertexBindingDescriptionsCount = (uint)descriptorIndex; + + // NOTE: Viewports, Scissors are dynamic. + + for (int i = 0; i < 8; i++) + { + var blend = state.BlendDescriptors[i]; + + pipeline.Internal.ColorBlendAttachmentState[i] = new PipelineColorBlendAttachmentState( + blend.Enable, + blend.ColorSrcFactor.Convert(), + blend.ColorDstFactor.Convert(), + blend.ColorOp.Convert(), + blend.AlphaSrcFactor.Convert(), + blend.AlphaDstFactor.Convert(), + blend.AlphaOp.Convert(), + (ColorComponentFlags)state.ColorWriteMask[i]); + } + + int maxAttachmentIndex = 0; + for (int i = 0; i < 8; i++) + { + if (state.AttachmentEnable[i]) + { + pipeline.Internal.AttachmentFormats[maxAttachmentIndex++] = gd.FormatCapabilities.ConvertToVkFormat(state.AttachmentFormats[i]); + } + } + + if (state.DepthStencilEnable) + { + pipeline.Internal.AttachmentFormats[maxAttachmentIndex++] = gd.FormatCapabilities.ConvertToVkFormat(state.DepthStencilFormat); + } + + pipeline.ColorBlendAttachmentStateCount = 8; + pipeline.VertexAttributeDescriptionsCount = (uint)Math.Min(Constants.MaxVertexAttributes, state.VertexAttribCount); + + return pipeline; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineDynamicState.cs b/Ryujinx.Graphics.Vulkan/PipelineDynamicState.cs new file mode 100644 index 000000000..2553101d7 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineDynamicState.cs @@ -0,0 +1,138 @@ +using Ryujinx.Common.Memory; +using Silk.NET.Vulkan; + +namespace Ryujinx.Graphics.Vulkan +{ + struct PipelineDynamicState + { + private float _depthBiasSlopeFactor; + private float _depthBiasConstantFactor; + private float _depthBiasClamp; + + public int ScissorsCount; + private Array16 _scissors; + + private uint _backCompareMask; + private uint _backWriteMask; + private uint _backReference; + private uint _frontCompareMask; + private uint _frontWriteMask; + private uint _frontReference; + + public int ViewportsCount; + public Array16 Viewports; + + private enum DirtyFlags + { + None = 0, + DepthBias = 1 << 0, + Scissor = 1 << 1, + Stencil = 1 << 2, + Viewport = 1 << 3, + All = DepthBias | Scissor | Stencil | Viewport + } + + private DirtyFlags _dirty; + + public void SetDepthBias(float slopeFactor, float constantFactor, float clamp) + { + _depthBiasSlopeFactor = slopeFactor; + _depthBiasConstantFactor = constantFactor; + _depthBiasClamp = clamp; + + _dirty |= DirtyFlags.DepthBias; + } + + public void SetScissor(int index, Rect2D scissor) + { + _scissors[index] = scissor; + + _dirty |= DirtyFlags.Scissor; + } + + public void SetStencilMasks( + uint backCompareMask, + uint backWriteMask, + uint backReference, + uint frontCompareMask, + uint frontWriteMask, + uint frontReference) + { + _backCompareMask = backCompareMask; + _backWriteMask = backWriteMask; + _backReference = backReference; + _frontCompareMask = frontCompareMask; + _frontWriteMask = frontWriteMask; + _frontReference = frontReference; + + _dirty |= DirtyFlags.Stencil; + } + + public void SetViewport(int index, Viewport viewport) + { + Viewports[index] = viewport; + + _dirty |= DirtyFlags.Viewport; + } + + public void SetViewportsDirty() + { + _dirty |= DirtyFlags.Viewport; + } + + public void ForceAllDirty() + { + _dirty = DirtyFlags.All; + } + + public void ReplayIfDirty(Vk api, CommandBuffer commandBuffer) + { + if (_dirty.HasFlag(DirtyFlags.DepthBias)) + { + RecordDepthBias(api, commandBuffer); + } + + if (_dirty.HasFlag(DirtyFlags.Scissor)) + { + RecordScissor(api, commandBuffer); + } + + if (_dirty.HasFlag(DirtyFlags.Stencil)) + { + RecordStencilMasks(api, commandBuffer); + } + + if (_dirty.HasFlag(DirtyFlags.Viewport)) + { + RecordViewport(api, commandBuffer); + } + + _dirty = DirtyFlags.None; + } + + private void RecordDepthBias(Vk api, CommandBuffer commandBuffer) + { + api.CmdSetDepthBias(commandBuffer, _depthBiasConstantFactor, _depthBiasClamp, _depthBiasSlopeFactor); + } + + private void RecordScissor(Vk api, CommandBuffer commandBuffer) + { + api.CmdSetScissor(commandBuffer, 0, (uint)ScissorsCount, _scissors.ToSpan()); + } + + private void RecordStencilMasks(Vk api, CommandBuffer commandBuffer) + { + api.CmdSetStencilCompareMask(commandBuffer, StencilFaceFlags.StencilFaceBackBit, _backCompareMask); + api.CmdSetStencilWriteMask(commandBuffer, StencilFaceFlags.StencilFaceBackBit, _backWriteMask); + api.CmdSetStencilReference(commandBuffer, StencilFaceFlags.StencilFaceBackBit, _backReference); + api.CmdSetStencilCompareMask(commandBuffer, StencilFaceFlags.StencilFaceFrontBit, _frontCompareMask); + api.CmdSetStencilWriteMask(commandBuffer, StencilFaceFlags.StencilFaceFrontBit, _frontWriteMask); + api.CmdSetStencilReference(commandBuffer, StencilFaceFlags.StencilFaceFrontBit, _frontReference); + } + + private void RecordViewport(Vk api, CommandBuffer commandBuffer) + { + api.CmdSetViewport(commandBuffer, 0, (uint)ViewportsCount, Viewports.ToSpan()); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineFull.cs b/Ryujinx.Graphics.Vulkan/PipelineFull.cs new file mode 100644 index 000000000..1b8b12880 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineFull.cs @@ -0,0 +1,260 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Vulkan.Queries; +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Vulkan +{ + class PipelineFull : PipelineBase, IPipeline + { + private bool _hasPendingQuery; + + private readonly List _activeQueries; + private CounterQueueEvent _activeConditionalRender; + + private readonly List _pendingQueryCopies; + private readonly List _pendingQueryResets; + + public PipelineFull(VulkanGraphicsDevice gd, Device device) : base(gd, device) + { + _activeQueries = new List(); + _pendingQueryCopies = new(); + _pendingQueryResets = new List(); + + CommandBuffer = (Cbs = gd.CommandBufferPool.Rent()).CommandBuffer; + } + + private void CopyPendingQuery() + { + foreach (var query in _pendingQueryCopies) + { + query.PoolCopy(Cbs); + } + + lock (_pendingQueryResets) + { + foreach (var query in _pendingQueryResets) + { + query.PoolReset(CommandBuffer); + } + + _pendingQueryResets.Clear(); + } + + _pendingQueryCopies.Clear(); + } + + public void ClearRenderTargetColor(int index, int layer, uint componentMask, ColorF color) + { + if (FramebufferParams == null) + { + return; + } + + if (componentMask != 0xf) + { + // We can't use CmdClearAttachments if not writing all components, + // because on Vulkan, the pipeline state does not affect clears. + var dstTexture = FramebufferParams.GetAttachment(index); + if (dstTexture == null) + { + return; + } + + Span clearColor = stackalloc float[4]; + clearColor[0] = color.Red; + clearColor[1] = color.Green; + clearColor[2] = color.Blue; + clearColor[3] = color.Alpha; + + // TODO: Clear only the specified layer. + Gd.HelperShader.Clear( + Gd, + dstTexture, + clearColor, + componentMask, + (int)FramebufferParams.Width, + (int)FramebufferParams.Height, + FramebufferParams.AttachmentFormats[index], + ClearScissor); + } + else + { + ClearRenderTargetColor(index, layer, color); + } + } + + public void EndHostConditionalRendering() + { + if (Gd.Capabilities.SupportsConditionalRendering) + { + // Gd.ConditionalRenderingApi.CmdEndConditionalRendering(CommandBuffer); + } + else + { + // throw new NotSupportedException(); + } + + _activeConditionalRender?.ReleaseHostAccess(); + _activeConditionalRender = null; + } + + public bool TryHostConditionalRendering(ICounterEvent value, ulong compare, bool isEqual) + { + // Compare an event and a constant value. + if (value is CounterQueueEvent evt) + { + // Easy host conditional rendering when the check matches what GL can do: + // - Event is of type samples passed. + // - Result is not a combination of multiple queries. + // - Comparing against 0. + // - Event has not already been flushed. + + if (compare == 0 && evt.Type == CounterType.SamplesPassed && evt.ClearCounter) + { + if (!value.ReserveForHostAccess()) + { + // If the event has been flushed, then just use the values on the CPU. + // The query object may already be repurposed for another draw (eg. begin + end). + return false; + } + + if (Gd.Capabilities.SupportsConditionalRendering) + { + var buffer = evt.GetBuffer().Get(Cbs, 0, sizeof(long)).Value; + var flags = isEqual ? ConditionalRenderingFlagsEXT.ConditionalRenderingInvertedBitExt : 0; + + var conditionalRenderingBeginInfo = new ConditionalRenderingBeginInfoEXT() + { + SType = StructureType.ConditionalRenderingBeginInfoExt, + Buffer = buffer, + Flags = flags + }; + + // Gd.ConditionalRenderingApi.CmdBeginConditionalRendering(CommandBuffer, conditionalRenderingBeginInfo); + } + + _activeConditionalRender = evt; + return true; + } + } + + // The GPU will flush the queries to CPU and evaluate the condition there instead. + + FlushPendingQuery(); // The thread will be stalled manually flushing the counter, so flush commands now. + return false; + } + + public bool TryHostConditionalRendering(ICounterEvent value, ICounterEvent compare, bool isEqual) + { + FlushPendingQuery(); // The thread will be stalled manually flushing the counter, so flush commands now. + return false; + } + + private void FlushPendingQuery() + { + if (_hasPendingQuery) + { + _hasPendingQuery = false; + FlushCommandsImpl(); + } + } + + public CommandBufferScoped GetPreloadCommandBuffer() + { + if (PreloadCbs == null) + { + PreloadCbs = Gd.CommandBufferPool.Rent(); + } + + return PreloadCbs.Value; + } + + public void FlushCommandsImpl([System.Runtime.CompilerServices.CallerMemberName] string caller = "") + { + // System.Console.WriteLine("flush by " + caller); + + EndRenderPass(); + + foreach (var queryPool in _activeQueries) + { + Gd.Api.CmdEndQuery(CommandBuffer, queryPool, 0); + } + + if (PreloadCbs != null) + { + PreloadCbs.Value.Dispose(); + PreloadCbs = null; + } + + CommandBuffer = (Cbs = Gd.CommandBufferPool.ReturnAndRent(Cbs)).CommandBuffer; + + // Restore per-command buffer state. + + if (Pipeline != null) + { + Gd.Api.CmdBindPipeline(CommandBuffer, Pbp, Pipeline.Get(Cbs).Value); + } + + foreach (var queryPool in _activeQueries) + { + Gd.Api.CmdResetQueryPool(CommandBuffer, queryPool, 0, 1); + Gd.Api.CmdBeginQuery(CommandBuffer, queryPool, 0, 0); + } + + SignalCommandBufferChange(); + } + + public void BeginQuery(BufferedQuery query, QueryPool pool, bool needsReset) + { + if (needsReset) + { + EndRenderPass(); + + Gd.Api.CmdResetQueryPool(CommandBuffer, pool, 0, 1); + + lock (_pendingQueryResets) + { + _pendingQueryResets.Remove(query); // Might be present on here. + } + } + + Gd.Api.CmdBeginQuery(CommandBuffer, pool, 0, 0); + + _activeQueries.Add(pool); + } + + public void EndQuery(QueryPool pool) + { + Gd.Api.CmdEndQuery(CommandBuffer, pool, 0); + + _activeQueries.Remove(pool); + } + + public void ResetQuery(BufferedQuery query) + { + lock (_pendingQueryResets) + { + _pendingQueryResets.Add(query); + } + } + + public void CopyQueryResults(BufferedQuery query) + { + _pendingQueryCopies.Add(query); + + _hasPendingQuery = true; + } + + protected override void SignalAttachmentChange() + { + FlushPendingQuery(); + } + + protected override void SignalRenderPassEnd() + { + CopyPendingQuery(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineHelperShader.cs b/Ryujinx.Graphics.Vulkan/PipelineHelperShader.cs new file mode 100644 index 000000000..f0126a420 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineHelperShader.cs @@ -0,0 +1,44 @@ +using Silk.NET.Vulkan; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class PipelineHelperShader : PipelineBase + { + public PipelineHelperShader(VulkanGraphicsDevice gd, Device device) : base(gd, device) + { + } + + public void SetRenderTarget(Auto view, uint width, uint height, bool isDepthStencil, VkFormat format) + { + CreateFramebuffer(view, width, height, isDepthStencil, format); + CreateRenderPass(); + SignalStateChange(); + } + + private void CreateFramebuffer(Auto view, uint width, uint height, bool isDepthStencil, VkFormat format) + { + FramebufferParams = new FramebufferParams(Device, view, width, height, isDepthStencil, format); + UpdatePipelineAttachmentFormats(); + } + + public void SetCommandBuffer(CommandBufferScoped cbs) + { + CommandBuffer = (Cbs = cbs).CommandBuffer; + + // Restore per-command buffer state. + + if (Pipeline != null) + { + Gd.Api.CmdBindPipeline(CommandBuffer, Pbp, Pipeline.Get(CurrentCommandBuffer).Value); + } + + SignalCommandBufferChange(); + } + + public void Finish() + { + EndRenderPass(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineLayoutCache.cs b/Ryujinx.Graphics.Vulkan/PipelineLayoutCache.cs new file mode 100644 index 000000000..096be455e --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineLayoutCache.cs @@ -0,0 +1,58 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Vulkan +{ + class PipelineLayoutCache + { + private readonly PipelineLayoutCacheEntry[] _plce; + private readonly List _plceMinimal; + + public PipelineLayoutCache() + { + _plce = new PipelineLayoutCacheEntry[1 << Constants.MaxShaderStages]; + _plceMinimal = new List(); + } + + public PipelineLayoutCacheEntry Create(VulkanGraphicsDevice gd, Device device, ShaderSource[] shaders) + { + var plce = new PipelineLayoutCacheEntry(gd, device, shaders); + _plceMinimal.Add(plce); + return plce; + } + + public PipelineLayoutCacheEntry GetOrCreate(VulkanGraphicsDevice gd, Device device, uint stages, bool usePd) + { + if (_plce[stages] == null) + { + _plce[stages] = new PipelineLayoutCacheEntry(gd, device, stages, usePd); + } + + return _plce[stages]; + } + + protected virtual unsafe void Dispose(bool disposing) + { + if (disposing) + { + for (int i = 0; i < _plce.Length; i++) + { + _plce[i]?.Dispose(); + } + + foreach (var plce in _plceMinimal) + { + plce.Dispose(); + } + + _plceMinimal.Clear(); + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineLayoutCacheEntry.cs b/Ryujinx.Graphics.Vulkan/PipelineLayoutCacheEntry.cs new file mode 100644 index 000000000..e25c311fb --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineLayoutCacheEntry.cs @@ -0,0 +1,112 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Vulkan +{ + class PipelineLayoutCacheEntry + { + private readonly VulkanGraphicsDevice _gd; + private readonly Device _device; + + public DescriptorSetLayout[] DescriptorSetLayouts { get; } + public PipelineLayout PipelineLayout { get; } + + private readonly List>[][] _dsCache; + private readonly int[] _dsCacheCursor; + private int _dsLastCbIndex; + + private PipelineLayoutCacheEntry(VulkanGraphicsDevice gd, Device device) + { + _gd = gd; + _device = device; + + _dsCache = new List>[CommandBufferPool.MaxCommandBuffers][]; + + for (int i = 0; i < CommandBufferPool.MaxCommandBuffers; i++) + { + _dsCache[i] = new List>[PipelineBase.DescriptorSetLayouts]; + + for (int j = 0; j < PipelineBase.DescriptorSetLayouts; j++) + { + _dsCache[i][j] = new List>(); + } + } + + _dsCacheCursor = new int[PipelineBase.DescriptorSetLayouts]; + } + + public PipelineLayoutCacheEntry(VulkanGraphicsDevice gd, Device device, uint stages, bool usePd) : this(gd, device) + { + DescriptorSetLayouts = PipelineLayoutFactory.Create(gd, device, stages, usePd, out var pipelineLayout); + PipelineLayout = pipelineLayout; + } + + public PipelineLayoutCacheEntry(VulkanGraphicsDevice gd, Device device, ShaderSource[] shaders) : this(gd, device) + { + DescriptorSetLayouts = PipelineLayoutFactory.CreateMinimal(gd, device, shaders, out var pipelineLayout); + PipelineLayout = pipelineLayout; + } + + public Auto GetNewDescriptorSetCollection( + VulkanGraphicsDevice gd, + int commandBufferIndex, + int setIndex, + out bool isNew) + { + if (_dsLastCbIndex != commandBufferIndex) + { + _dsLastCbIndex = commandBufferIndex; + + for (int i = 0; i < PipelineBase.DescriptorSetLayouts; i++) + { + _dsCacheCursor[i] = 0; + } + } + + var list = _dsCache[commandBufferIndex][setIndex]; + int index = _dsCacheCursor[setIndex]++; + if (index == list.Count) + { + var dsc = gd.DescriptorSetManager.AllocateDescriptorSet(gd.Api, DescriptorSetLayouts[setIndex]); + list.Add(dsc); + isNew = true; + return dsc; + } + + isNew = false; + return list[index]; + } + + protected virtual unsafe void Dispose(bool disposing) + { + if (disposing) + { + for (int i = 0; i < _dsCache.Length; i++) + { + for (int j = 0; j < _dsCache[i].Length; j++) + { + for (int k = 0; k < _dsCache[i][j].Count; k++) + { + _dsCache[i][j][k].Dispose(); + } + + _dsCache[i][j].Clear(); + } + } + + _gd.Api.DestroyPipelineLayout(_device, PipelineLayout, null); + + for (int i = 0; i < DescriptorSetLayouts.Length; i++) + { + _gd.Api.DestroyDescriptorSetLayout(_device, DescriptorSetLayouts[i], null); + } + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineLayoutFactory.cs b/Ryujinx.Graphics.Vulkan/PipelineLayoutFactory.cs new file mode 100644 index 000000000..d37a4bc14 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineLayoutFactory.cs @@ -0,0 +1,253 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System.Collections.Generic; +using System.Numerics; + +namespace Ryujinx.Graphics.Vulkan +{ + static class PipelineLayoutFactory + { + private const ShaderStageFlags SupportBufferStages = + ShaderStageFlags.ShaderStageVertexBit | + ShaderStageFlags.ShaderStageFragmentBit | + ShaderStageFlags.ShaderStageComputeBit; + + public static unsafe DescriptorSetLayout[] Create(VulkanGraphicsDevice gd, Device device, uint stages, bool usePd, out PipelineLayout layout) + { + int stagesCount = BitOperations.PopCount(stages); + + int uCount = Constants.MaxUniformBuffersPerStage * stagesCount + 1; + int tCount = Constants.MaxTexturesPerStage * 2 * stagesCount; + int iCount = Constants.MaxImagesPerStage * 2 * stagesCount; + + DescriptorSetLayoutBinding* uLayoutBindings = stackalloc DescriptorSetLayoutBinding[uCount]; + DescriptorSetLayoutBinding* sLayoutBindings = stackalloc DescriptorSetLayoutBinding[stagesCount]; + DescriptorSetLayoutBinding* tLayoutBindings = stackalloc DescriptorSetLayoutBinding[tCount]; + DescriptorSetLayoutBinding* iLayoutBindings = stackalloc DescriptorSetLayoutBinding[iCount]; + + uLayoutBindings[0] = new DescriptorSetLayoutBinding + { + Binding = 0, + DescriptorType = DescriptorType.UniformBuffer, + DescriptorCount = 1, + StageFlags = SupportBufferStages + }; + + int iter = 0; + + while (stages != 0) + { + int stage = BitOperations.TrailingZeroCount(stages); + stages &= ~(1u << stage); + + var stageFlags = stage switch + { + 1 => ShaderStageFlags.ShaderStageFragmentBit, + 2 => ShaderStageFlags.ShaderStageGeometryBit, + 3 => ShaderStageFlags.ShaderStageTessellationControlBit, + 4 => ShaderStageFlags.ShaderStageTessellationEvaluationBit, + _ => ShaderStageFlags.ShaderStageVertexBit | ShaderStageFlags.ShaderStageComputeBit + }; + + void Set(DescriptorSetLayoutBinding* bindings, int maxPerStage, DescriptorType type, int start, int skip) + { + int totalPerStage = maxPerStage * skip; + + for (int i = 0; i < maxPerStage; i++) + { + bindings[start + iter * totalPerStage + i] = new DescriptorSetLayoutBinding + { + Binding = (uint)(start + stage * totalPerStage + i), + DescriptorType = type, + DescriptorCount = 1, + StageFlags = stageFlags + }; + } + } + + void SetStorage(DescriptorSetLayoutBinding* bindings, int maxPerStage, int start = 0) + { + bindings[start + iter] = new DescriptorSetLayoutBinding + { + Binding = (uint)(start + stage * maxPerStage), + DescriptorType = DescriptorType.StorageBuffer, + DescriptorCount = (uint)maxPerStage, + StageFlags = stageFlags + }; + } + + Set(uLayoutBindings, Constants.MaxUniformBuffersPerStage, DescriptorType.UniformBuffer, 1, 1); + SetStorage(sLayoutBindings, Constants.MaxStorageBuffersPerStage); + Set(tLayoutBindings, Constants.MaxTexturesPerStage, DescriptorType.CombinedImageSampler, 0, 2); + Set(tLayoutBindings, Constants.MaxTexturesPerStage, DescriptorType.UniformTexelBuffer, Constants.MaxTexturesPerStage, 2); + Set(iLayoutBindings, Constants.MaxImagesPerStage, DescriptorType.StorageImage, 0, 2); + Set(iLayoutBindings, Constants.MaxImagesPerStage, DescriptorType.StorageTexelBuffer, Constants.MaxImagesPerStage, 2); + + iter++; + } + + DescriptorSetLayout[] layouts = new DescriptorSetLayout[PipelineFull.DescriptorSetLayouts]; + + var uDescriptorSetLayoutCreateInfo = new DescriptorSetLayoutCreateInfo() + { + SType = StructureType.DescriptorSetLayoutCreateInfo, + PBindings = uLayoutBindings, + BindingCount = (uint)uCount, + Flags = usePd ? DescriptorSetLayoutCreateFlags.DescriptorSetLayoutCreatePushDescriptorBitKhr : 0 + }; + + var sDescriptorSetLayoutCreateInfo = new DescriptorSetLayoutCreateInfo() + { + SType = StructureType.DescriptorSetLayoutCreateInfo, + PBindings = sLayoutBindings, + BindingCount = (uint)stagesCount + }; + + var tDescriptorSetLayoutCreateInfo = new DescriptorSetLayoutCreateInfo() + { + SType = StructureType.DescriptorSetLayoutCreateInfo, + PBindings = tLayoutBindings, + BindingCount = (uint)tCount + }; + + var iDescriptorSetLayoutCreateInfo = new DescriptorSetLayoutCreateInfo() + { + SType = StructureType.DescriptorSetLayoutCreateInfo, + PBindings = iLayoutBindings, + BindingCount = (uint)iCount + }; + + gd.Api.CreateDescriptorSetLayout(device, uDescriptorSetLayoutCreateInfo, null, out layouts[PipelineFull.UniformSetIndex]).ThrowOnError(); + gd.Api.CreateDescriptorSetLayout(device, sDescriptorSetLayoutCreateInfo, null, out layouts[PipelineFull.StorageSetIndex]).ThrowOnError(); + gd.Api.CreateDescriptorSetLayout(device, tDescriptorSetLayoutCreateInfo, null, out layouts[PipelineFull.TextureSetIndex]).ThrowOnError(); + gd.Api.CreateDescriptorSetLayout(device, iDescriptorSetLayoutCreateInfo, null, out layouts[PipelineFull.ImageSetIndex]).ThrowOnError(); + + fixed (DescriptorSetLayout* pLayouts = layouts) + { + var pipelineLayoutCreateInfo = new PipelineLayoutCreateInfo() + { + SType = StructureType.PipelineLayoutCreateInfo, + PSetLayouts = pLayouts, + SetLayoutCount = PipelineFull.DescriptorSetLayouts + }; + + gd.Api.CreatePipelineLayout(device, &pipelineLayoutCreateInfo, null, out layout).ThrowOnError(); + } + + return layouts; + } + + public static unsafe DescriptorSetLayout[] CreateMinimal(VulkanGraphicsDevice gd, Device device, ShaderSource[] shaders, out PipelineLayout layout) + { + int stagesCount = shaders.Length; + + int uCount = 0; + int tCount = 0; + int iCount = 0; + + foreach (var shader in shaders) + { + uCount += shader.Bindings.UniformBufferBindings.Count; + tCount += shader.Bindings.TextureBindings.Count; + iCount += shader.Bindings.ImageBindings.Count; + } + + DescriptorSetLayoutBinding* uLayoutBindings = stackalloc DescriptorSetLayoutBinding[uCount]; + DescriptorSetLayoutBinding* sLayoutBindings = stackalloc DescriptorSetLayoutBinding[stagesCount]; + DescriptorSetLayoutBinding* tLayoutBindings = stackalloc DescriptorSetLayoutBinding[tCount]; + DescriptorSetLayoutBinding* iLayoutBindings = stackalloc DescriptorSetLayoutBinding[iCount]; + + int uIndex = 0; + int sIndex = 0; + int tIndex = 0; + int iIndex = 0; + + foreach (var shader in shaders) + { + var stageFlags = shader.Stage.Convert(); + + void Set(DescriptorSetLayoutBinding* bindings, DescriptorType type, ref int start, IEnumerable bds) + { + foreach (var b in bds) + { + bindings[start++] = new DescriptorSetLayoutBinding + { + Binding = (uint)b, + DescriptorType = type, + DescriptorCount = 1, + StageFlags = stageFlags + }; + } + } + + void SetStorage(DescriptorSetLayoutBinding* bindings, ref int start, int count) + { + bindings[start++] = new DescriptorSetLayoutBinding + { + Binding = (uint)start, + DescriptorType = DescriptorType.StorageBuffer, + DescriptorCount = (uint)count, + StageFlags = stageFlags + }; + } + + // TODO: Support buffer textures and images here. + // This is only used for the helper shaders on the backkend, and we don't use buffer textures on them + // so far, so it's not really necessary right now. + Set(uLayoutBindings, DescriptorType.UniformBuffer, ref uIndex, shader.Bindings.UniformBufferBindings); + SetStorage(sLayoutBindings, ref sIndex, shader.Bindings.StorageBufferBindings.Count); + Set(tLayoutBindings, DescriptorType.CombinedImageSampler, ref tIndex, shader.Bindings.TextureBindings); + Set(iLayoutBindings, DescriptorType.StorageImage, ref iIndex, shader.Bindings.ImageBindings); + } + + DescriptorSetLayout[] layouts = new DescriptorSetLayout[PipelineFull.DescriptorSetLayouts]; + + var uDescriptorSetLayoutCreateInfo = new DescriptorSetLayoutCreateInfo() + { + SType = StructureType.DescriptorSetLayoutCreateInfo, + PBindings = uLayoutBindings, + BindingCount = (uint)uCount + }; + + var sDescriptorSetLayoutCreateInfo = new DescriptorSetLayoutCreateInfo() + { + SType = StructureType.DescriptorSetLayoutCreateInfo, + PBindings = sLayoutBindings, + BindingCount = (uint)stagesCount + }; + + var tDescriptorSetLayoutCreateInfo = new DescriptorSetLayoutCreateInfo() + { + SType = StructureType.DescriptorSetLayoutCreateInfo, + PBindings = tLayoutBindings, + BindingCount = (uint)tCount + }; + + var iDescriptorSetLayoutCreateInfo = new DescriptorSetLayoutCreateInfo() + { + SType = StructureType.DescriptorSetLayoutCreateInfo, + PBindings = iLayoutBindings, + BindingCount = (uint)iCount + }; + + gd.Api.CreateDescriptorSetLayout(device, uDescriptorSetLayoutCreateInfo, null, out layouts[PipelineFull.UniformSetIndex]).ThrowOnError(); + gd.Api.CreateDescriptorSetLayout(device, sDescriptorSetLayoutCreateInfo, null, out layouts[PipelineFull.StorageSetIndex]).ThrowOnError(); + gd.Api.CreateDescriptorSetLayout(device, tDescriptorSetLayoutCreateInfo, null, out layouts[PipelineFull.TextureSetIndex]).ThrowOnError(); + gd.Api.CreateDescriptorSetLayout(device, iDescriptorSetLayoutCreateInfo, null, out layouts[PipelineFull.ImageSetIndex]).ThrowOnError(); + + fixed (DescriptorSetLayout* pLayouts = layouts) + { + var pipelineLayoutCreateInfo = new PipelineLayoutCreateInfo() + { + SType = StructureType.PipelineLayoutCreateInfo, + PSetLayouts = pLayouts, + SetLayoutCount = PipelineFull.DescriptorSetLayouts + }; + + gd.Api.CreatePipelineLayout(device, &pipelineLayoutCreateInfo, null, out layout).ThrowOnError(); + } + + return layouts; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineState.cs b/Ryujinx.Graphics.Vulkan/PipelineState.cs new file mode 100644 index 000000000..affa13dda --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineState.cs @@ -0,0 +1,572 @@ +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + struct PipelineState : IDisposable + { + private const int RequiredSubgroupSize = 32; + + public PipelineUid Internal; + + public float LineWidth + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id0 >> 0) & 0xFFFFFFFF)); + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFF00000000) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 0); + } + + public float DepthBiasClamp + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id0 >> 32) & 0xFFFFFFFF)); + set => Internal.Id0 = (Internal.Id0 & 0xFFFFFFFF) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 32); + } + + public float DepthBiasConstantFactor + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id1 >> 0) & 0xFFFFFFFF)); + set => Internal.Id1 = (Internal.Id1 & 0xFFFFFFFF00000000) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 0); + } + + public float DepthBiasSlopeFactor + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id1 >> 32) & 0xFFFFFFFF)); + set => Internal.Id1 = (Internal.Id1 & 0xFFFFFFFF) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 32); + } + + public uint StencilFrontCompareMask + { + get => (uint)((Internal.Id2 >> 0) & 0xFFFFFFFF); + set => Internal.Id2 = (Internal.Id2 & 0xFFFFFFFF00000000) | ((ulong)value << 0); + } + + public uint StencilFrontWriteMask + { + get => (uint)((Internal.Id2 >> 32) & 0xFFFFFFFF); + set => Internal.Id2 = (Internal.Id2 & 0xFFFFFFFF) | ((ulong)value << 32); + } + + public uint StencilFrontReference + { + get => (uint)((Internal.Id3 >> 0) & 0xFFFFFFFF); + set => Internal.Id3 = (Internal.Id3 & 0xFFFFFFFF00000000) | ((ulong)value << 0); + } + + public uint StencilBackCompareMask + { + get => (uint)((Internal.Id3 >> 32) & 0xFFFFFFFF); + set => Internal.Id3 = (Internal.Id3 & 0xFFFFFFFF) | ((ulong)value << 32); + } + + public uint StencilBackWriteMask + { + get => (uint)((Internal.Id4 >> 0) & 0xFFFFFFFF); + set => Internal.Id4 = (Internal.Id4 & 0xFFFFFFFF00000000) | ((ulong)value << 0); + } + + public uint StencilBackReference + { + get => (uint)((Internal.Id4 >> 32) & 0xFFFFFFFF); + set => Internal.Id4 = (Internal.Id4 & 0xFFFFFFFF) | ((ulong)value << 32); + } + + public float MinDepthBounds + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id5 >> 0) & 0xFFFFFFFF)); + set => Internal.Id5 = (Internal.Id5 & 0xFFFFFFFF00000000) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 0); + } + + public float MaxDepthBounds + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id5 >> 32) & 0xFFFFFFFF)); + set => Internal.Id5 = (Internal.Id5 & 0xFFFFFFFF) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 32); + } + + public float BlendConstantR + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id6 >> 0) & 0xFFFFFFFF)); + set => Internal.Id6 = (Internal.Id6 & 0xFFFFFFFF00000000) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 0); + } + + public float BlendConstantG + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id6 >> 32) & 0xFFFFFFFF)); + set => Internal.Id6 = (Internal.Id6 & 0xFFFFFFFF) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 32); + } + + public float BlendConstantB + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id7 >> 0) & 0xFFFFFFFF)); + set => Internal.Id7 = (Internal.Id7 & 0xFFFFFFFF00000000) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 0); + } + + public float BlendConstantA + { + get => BitConverter.Int32BitsToSingle((int)((Internal.Id7 >> 32) & 0xFFFFFFFF)); + set => Internal.Id7 = (Internal.Id7 & 0xFFFFFFFF) | ((ulong)(uint)BitConverter.SingleToInt32Bits(value) << 32); + } + + public PolygonMode PolygonMode + { + get => (PolygonMode)((Internal.Id8 >> 0) & 0x3FFFFFFF); + set => Internal.Id8 = (Internal.Id8 & 0xFFFFFFFFC0000000) | ((ulong)value << 0); + } + + public uint StagesCount + { + get => (byte)((Internal.Id8 >> 30) & 0xFF); + set => Internal.Id8 = (Internal.Id8 & 0xFFFFFFC03FFFFFFF) | ((ulong)value << 30); + } + + public uint VertexAttributeDescriptionsCount + { + get => (byte)((Internal.Id8 >> 38) & 0xFF); + set => Internal.Id8 = (Internal.Id8 & 0xFFFFC03FFFFFFFFF) | ((ulong)value << 38); + } + + public uint VertexBindingDescriptionsCount + { + get => (byte)((Internal.Id8 >> 46) & 0xFF); + set => Internal.Id8 = (Internal.Id8 & 0xFFC03FFFFFFFFFFF) | ((ulong)value << 46); + } + + public uint ViewportsCount + { + get => (byte)((Internal.Id8 >> 54) & 0xFF); + set => Internal.Id8 = (Internal.Id8 & 0xC03FFFFFFFFFFFFF) | ((ulong)value << 54); + } + + public uint ScissorsCount + { + get => (byte)((Internal.Id9 >> 0) & 0xFF); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFFFFFFFFFF00) | ((ulong)value << 0); + } + + public uint ColorBlendAttachmentStateCount + { + get => (byte)((Internal.Id9 >> 8) & 0xFF); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFFFFFFFF00FF) | ((ulong)value << 8); + } + + public PrimitiveTopology Topology + { + get => (PrimitiveTopology)((Internal.Id9 >> 16) & 0xF); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFFFFFFF0FFFF) | ((ulong)value << 16); + } + + public LogicOp LogicOp + { + get => (LogicOp)((Internal.Id9 >> 20) & 0xF); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFFFFFF0FFFFF) | ((ulong)value << 20); + } + + public CompareOp DepthCompareOp + { + get => (CompareOp)((Internal.Id9 >> 24) & 0x7); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFFFFF8FFFFFF) | ((ulong)value << 24); + } + + public StencilOp StencilFrontFailOp + { + get => (StencilOp)((Internal.Id9 >> 27) & 0x7); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFFFFC7FFFFFF) | ((ulong)value << 27); + } + + public StencilOp StencilFrontPassOp + { + get => (StencilOp)((Internal.Id9 >> 30) & 0x7); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFFFE3FFFFFFF) | ((ulong)value << 30); + } + + public StencilOp StencilFrontDepthFailOp + { + get => (StencilOp)((Internal.Id9 >> 33) & 0x7); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFFF1FFFFFFFF) | ((ulong)value << 33); + } + + public CompareOp StencilFrontCompareOp + { + get => (CompareOp)((Internal.Id9 >> 36) & 0x7); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFF8FFFFFFFFF) | ((ulong)value << 36); + } + + public StencilOp StencilBackFailOp + { + get => (StencilOp)((Internal.Id9 >> 39) & 0x7); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFFC7FFFFFFFFF) | ((ulong)value << 39); + } + + public StencilOp StencilBackPassOp + { + get => (StencilOp)((Internal.Id9 >> 42) & 0x7); + set => Internal.Id9 = (Internal.Id9 & 0xFFFFE3FFFFFFFFFF) | ((ulong)value << 42); + } + + public StencilOp StencilBackDepthFailOp + { + get => (StencilOp)((Internal.Id9 >> 45) & 0x7); + set => Internal.Id9 = (Internal.Id9 & 0xFFFF1FFFFFFFFFFF) | ((ulong)value << 45); + } + + public CompareOp StencilBackCompareOp + { + get => (CompareOp)((Internal.Id9 >> 48) & 0x7); + set => Internal.Id9 = (Internal.Id9 & 0xFFF8FFFFFFFFFFFF) | ((ulong)value << 48); + } + + public CullModeFlags CullMode + { + get => (CullModeFlags)((Internal.Id9 >> 51) & 0x3); + set => Internal.Id9 = (Internal.Id9 & 0xFFE7FFFFFFFFFFFF) | ((ulong)value << 51); + } + + public bool PrimitiveRestartEnable + { + get => ((Internal.Id9 >> 53) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0xFFDFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 53); + } + + public bool DepthClampEnable + { + get => ((Internal.Id9 >> 54) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0xFFBFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 54); + } + + public bool RasterizerDiscardEnable + { + get => ((Internal.Id9 >> 55) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0xFF7FFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 55); + } + + public FrontFace FrontFace + { + get => (FrontFace)((Internal.Id9 >> 56) & 0x1); + set => Internal.Id9 = (Internal.Id9 & 0xFEFFFFFFFFFFFFFF) | ((ulong)value << 56); + } + + public bool DepthBiasEnable + { + get => ((Internal.Id9 >> 57) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0xFDFFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 57); + } + + public bool DepthTestEnable + { + get => ((Internal.Id9 >> 58) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0xFBFFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 58); + } + + public bool DepthWriteEnable + { + get => ((Internal.Id9 >> 59) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0xF7FFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 59); + } + + public bool DepthBoundsTestEnable + { + get => ((Internal.Id9 >> 60) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0xEFFFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 60); + } + + public bool StencilTestEnable + { + get => ((Internal.Id9 >> 61) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0xDFFFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 61); + } + + public bool LogicOpEnable + { + get => ((Internal.Id9 >> 62) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0xBFFFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 62); + } + + public bool HasDepthStencil + { + get => ((Internal.Id9 >> 63) & 0x1) != 0UL; + set => Internal.Id9 = (Internal.Id9 & 0x7FFFFFFFFFFFFFFF) | ((value ? 1UL : 0UL) << 63); + } + + public uint PatchControlPoints + { + get => (uint)((Internal.Id10 >> 0) & 0xFFFFFFFF); + set => Internal.Id10 = (Internal.Id10 & 0xFFFFFFFF00000000) | ((ulong)value << 0); + } + + public uint SamplesCount + { + get => (uint)((Internal.Id10 >> 32) & 0xFFFFFFFF); + set => Internal.Id10 = (Internal.Id10 & 0xFFFFFFFF) | ((ulong)value << 32); + } + + public NativeArray Stages; + public NativeArray StageRequiredSubgroupSizes; + public PipelineLayout PipelineLayout; + + public void Initialize() + { + Stages = new NativeArray(Constants.MaxShaderStages); + StageRequiredSubgroupSizes = new NativeArray(Constants.MaxShaderStages); + + for (int index = 0; index < Constants.MaxShaderStages; index++) + { + StageRequiredSubgroupSizes[index] = new PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT() + { + SType = StructureType.PipelineShaderStageRequiredSubgroupSizeCreateInfoExt, + RequiredSubgroupSize = RequiredSubgroupSize + }; + } + } + + public unsafe Auto CreateComputePipeline( + VulkanGraphicsDevice gd, + Device device, + ShaderCollection program, + PipelineCache cache) + { + if (program.TryGetComputePipeline(out var pipeline)) + { + return pipeline; + } + + if (gd.SupportsSubgroupSizeControl) + { + UpdateStageRequiredSubgroupSizes(gd, 1); + } + + var pipelineCreateInfo = new ComputePipelineCreateInfo() + { + SType = StructureType.ComputePipelineCreateInfo, + Stage = Stages[0], + BasePipelineIndex = -1, + Layout = PipelineLayout + }; + + Pipeline pipelineHandle = default; + + gd.Api.CreateComputePipelines(device, cache, 1, &pipelineCreateInfo, null, &pipelineHandle).ThrowOnError(); + + pipeline = new Auto(new DisposablePipeline(gd.Api, device, pipelineHandle)); + + program.AddComputePipeline(pipeline); + + return pipeline; + } + + public unsafe void DestroyComputePipeline(ShaderCollection program) + { + program.RemoveComputePipeline(); + } + + public unsafe Auto CreateGraphicsPipeline( + VulkanGraphicsDevice gd, + Device device, + ShaderCollection program, + PipelineCache cache, + RenderPass renderPass) + { + if (program.TryGetGraphicsPipeline(ref Internal, out var pipeline)) + { + return pipeline; + } + + Pipeline pipelineHandle = default; + + fixed (VertexInputAttributeDescription* pVertexAttributeDescriptions = &Internal.VertexAttributeDescriptions[0]) + fixed (VertexInputBindingDescription* pVertexBindingDescriptions = &Internal.VertexBindingDescriptions[0]) + fixed (Viewport* pViewports = &Internal.Viewports[0]) + fixed (Rect2D* pScissors = &Internal.Scissors[0]) + fixed (PipelineColorBlendAttachmentState* pColorBlendAttachmentState = &Internal.ColorBlendAttachmentState[0]) + { + var vertexInputState = new PipelineVertexInputStateCreateInfo + { + SType = StructureType.PipelineVertexInputStateCreateInfo, + VertexAttributeDescriptionCount = VertexAttributeDescriptionsCount, + PVertexAttributeDescriptions = pVertexAttributeDescriptions, + VertexBindingDescriptionCount = VertexBindingDescriptionsCount, + PVertexBindingDescriptions = pVertexBindingDescriptions + }; + + bool primitiveRestartEnable = PrimitiveRestartEnable; + + primitiveRestartEnable &= Topology == PrimitiveTopology.LineStrip || + Topology == PrimitiveTopology.TriangleStrip || + Topology == PrimitiveTopology.TriangleFan || + Topology == PrimitiveTopology.LineStripWithAdjacency || + Topology == PrimitiveTopology.TriangleStripWithAdjacency; + + var inputAssemblyState = new PipelineInputAssemblyStateCreateInfo() + { + SType = StructureType.PipelineInputAssemblyStateCreateInfo, + PrimitiveRestartEnable = primitiveRestartEnable, + Topology = Topology + }; + + var tessellationState = new PipelineTessellationStateCreateInfo() + { + SType = StructureType.PipelineTessellationStateCreateInfo, + PatchControlPoints = PatchControlPoints + }; + + var rasterizationState = new PipelineRasterizationStateCreateInfo() + { + SType = StructureType.PipelineRasterizationStateCreateInfo, + DepthClampEnable = DepthClampEnable, + RasterizerDiscardEnable = RasterizerDiscardEnable, + PolygonMode = PolygonMode, + LineWidth = LineWidth, + CullMode = CullMode, + FrontFace = FrontFace, + DepthBiasEnable = DepthBiasEnable, + DepthBiasClamp = DepthBiasClamp, + DepthBiasConstantFactor = DepthBiasConstantFactor, + DepthBiasSlopeFactor = DepthBiasSlopeFactor + }; + + var viewportState = new PipelineViewportStateCreateInfo() + { + SType = StructureType.PipelineViewportStateCreateInfo, + ViewportCount = ViewportsCount, + PViewports = pViewports, + ScissorCount = ScissorsCount, + PScissors = pScissors + }; + + var multisampleState = new PipelineMultisampleStateCreateInfo + { + SType = StructureType.PipelineMultisampleStateCreateInfo, + SampleShadingEnable = false, + RasterizationSamples = TextureStorage.ConvertToSampleCountFlags(SamplesCount), + MinSampleShading = 1 + }; + + var stencilFront = new StencilOpState( + StencilFrontFailOp, + StencilFrontPassOp, + StencilFrontDepthFailOp, + StencilFrontCompareOp, + StencilFrontCompareMask, + StencilFrontWriteMask, + StencilFrontReference); + + var stencilBack = new StencilOpState( + StencilBackFailOp, + StencilBackPassOp, + StencilBackDepthFailOp, + StencilBackCompareOp, + StencilBackCompareMask, + StencilBackWriteMask, + StencilBackReference); + + var depthStencilState = new PipelineDepthStencilStateCreateInfo() + { + SType = StructureType.PipelineDepthStencilStateCreateInfo, + DepthTestEnable = DepthTestEnable, + DepthWriteEnable = DepthWriteEnable, + DepthCompareOp = DepthCompareOp, + DepthBoundsTestEnable = DepthBoundsTestEnable, + StencilTestEnable = StencilTestEnable, + Front = stencilFront, + Back = stencilBack, + MinDepthBounds = MinDepthBounds, + MaxDepthBounds = MaxDepthBounds + }; + + var colorBlendState = new PipelineColorBlendStateCreateInfo() + { + SType = StructureType.PipelineColorBlendStateCreateInfo, + LogicOpEnable = LogicOpEnable, + LogicOp = LogicOp, + AttachmentCount = ColorBlendAttachmentStateCount, + PAttachments = pColorBlendAttachmentState + }; + + colorBlendState.BlendConstants[0] = BlendConstantR; + colorBlendState.BlendConstants[1] = BlendConstantG; + colorBlendState.BlendConstants[2] = BlendConstantB; + colorBlendState.BlendConstants[3] = BlendConstantA; + + PipelineDynamicStateCreateInfo* pDynamicState = null; + + if (VulkanConfiguration.UseDynamicState) + { + bool supportsExtDynamicState = gd.Capabilities.SupportsExtendedDynamicState; + int dynamicStatesCount = supportsExtDynamicState ? 8 : 7; + + DynamicState* dynamicStates = stackalloc DynamicState[dynamicStatesCount]; + + dynamicStates[0] = DynamicState.Viewport; + dynamicStates[1] = DynamicState.Scissor; + dynamicStates[2] = DynamicState.DepthBias; + dynamicStates[3] = DynamicState.DepthBounds; + dynamicStates[4] = DynamicState.StencilCompareMask; + dynamicStates[5] = DynamicState.StencilWriteMask; + dynamicStates[6] = DynamicState.StencilReference; + + if (supportsExtDynamicState) + { + dynamicStates[7] = DynamicState.VertexInputBindingStrideExt; + } + + var pipelineDynamicStateCreateInfo = new PipelineDynamicStateCreateInfo() + { + SType = StructureType.PipelineDynamicStateCreateInfo, + DynamicStateCount = (uint)dynamicStatesCount, + PDynamicStates = dynamicStates + }; + + pDynamicState = &pipelineDynamicStateCreateInfo; + } + + if (gd.SupportsSubgroupSizeControl) + { + UpdateStageRequiredSubgroupSizes(gd, (int)StagesCount); + } + + var pipelineCreateInfo = new GraphicsPipelineCreateInfo() + { + SType = StructureType.GraphicsPipelineCreateInfo, + StageCount = StagesCount, + PStages = Stages.Pointer, + PVertexInputState = &vertexInputState, + PInputAssemblyState = &inputAssemblyState, + PTessellationState = &tessellationState, + PViewportState = &viewportState, + PRasterizationState = &rasterizationState, + PMultisampleState = &multisampleState, + PDepthStencilState = &depthStencilState, + PColorBlendState = &colorBlendState, + PDynamicState = pDynamicState, + Layout = PipelineLayout, + RenderPass = renderPass, + BasePipelineIndex = -1 + }; + + gd.Api.CreateGraphicsPipelines(device, cache, 1, &pipelineCreateInfo, null, &pipelineHandle).ThrowOnError(); + } + + pipeline = new Auto(new DisposablePipeline(gd.Api, device, pipelineHandle)); + + program.AddGraphicsPipeline(ref Internal, pipeline); + + return pipeline; + } + + private unsafe void UpdateStageRequiredSubgroupSizes(VulkanGraphicsDevice gd, int count) + { + for (int index = 0; index < count; index++) + { + bool canUseExplicitSubgroupSize = + (gd.Capabilities.RequiredSubgroupSizeStages & Stages[index].Stage) != 0 && + gd.Capabilities.MinSubgroupSize <= RequiredSubgroupSize && + gd.Capabilities.MaxSubgroupSize >= RequiredSubgroupSize; + + Stages[index].PNext = canUseExplicitSubgroupSize ? StageRequiredSubgroupSizes.Pointer + index : null; + } + } + + public void Dispose() + { + Stages.Dispose(); + StageRequiredSubgroupSizes.Dispose(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/PipelineUid.cs b/Ryujinx.Graphics.Vulkan/PipelineUid.cs new file mode 100644 index 000000000..da5d1a4d7 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/PipelineUid.cs @@ -0,0 +1,145 @@ +using Ryujinx.Common.Memory; +using Silk.NET.Vulkan; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Graphics.Vulkan +{ + struct PipelineUid : IRefEquatable + { + public ulong Id0; + public ulong Id1; + public ulong Id2; + public ulong Id3; + + public ulong Id4; + public ulong Id5; + public ulong Id6; + public ulong Id7; + + public ulong Id8; + public ulong Id9; + public ulong Id10; + public ulong Padding; + + private uint VertexAttributeDescriptionsCount => (byte)((Id8 >> 38) & 0xFF); + private uint VertexBindingDescriptionsCount => (byte)((Id8 >> 46) & 0xFF); + private uint ViewportsCount => (byte)((Id8 >> 54) & 0xFF); + private uint ScissorsCount => (byte)((Id9 >> 0) & 0xFF); + private uint ColorBlendAttachmentStateCount => (byte)((Id9 >> 8) & 0xFF); + private bool HasDepthStencil => ((Id9 >> 63) & 0x1) != 0UL; + + public Array32 VertexAttributeDescriptions; + public Array33 VertexBindingDescriptions; + public Array16 Viewports; + public Array16 Scissors; + public Array8 ColorBlendAttachmentState; + public Array9 AttachmentFormats; + + public override bool Equals(object obj) + { + return obj is PipelineUid other && Equals(other); + } + + public bool Equals(ref PipelineUid other) + { + if (!Unsafe.As>(ref Id0).Equals(Unsafe.As>(ref other.Id0)) || + !Unsafe.As>(ref Id4).Equals(Unsafe.As>(ref other.Id4)) || + !Unsafe.As>(ref Id8).Equals(Unsafe.As>(ref other.Id8))) + { + return false; + } + + if (!SequenceEqual(VertexAttributeDescriptions.ToSpan(), other.VertexAttributeDescriptions.ToSpan(), VertexAttributeDescriptionsCount)) + { + return false; + } + + if (!SequenceEqual(VertexBindingDescriptions.ToSpan(), other.VertexBindingDescriptions.ToSpan(), VertexBindingDescriptionsCount)) + { + return false; + } + + if (!VulkanConfiguration.UseDynamicState) + { + if (!SequenceEqual(Viewports.ToSpan(), other.Viewports.ToSpan(), ViewportsCount)) + { + return false; + } + + if (!SequenceEqual(Scissors.ToSpan(), other.Scissors.ToSpan(), ScissorsCount)) + { + return false; + } + } + + if (!SequenceEqual(ColorBlendAttachmentState.ToSpan(), other.ColorBlendAttachmentState.ToSpan(), ColorBlendAttachmentStateCount)) + { + return false; + } + + if (!SequenceEqual(AttachmentFormats.ToSpan(), other.AttachmentFormats.ToSpan(), ColorBlendAttachmentStateCount + (HasDepthStencil ? 1u : 0u))) + { + return false; + } + + return true; + } + + private static bool SequenceEqual(ReadOnlySpan x, ReadOnlySpan y, uint count) where T : unmanaged + { + return MemoryMarshal.Cast(x.Slice(0, (int)count)).SequenceEqual(MemoryMarshal.Cast(y.Slice(0, (int)count))); + } + + public override int GetHashCode() + { + ulong hash64 = Id0 * 23 ^ + Id1 * 23 ^ + Id2 * 23 ^ + Id3 * 23 ^ + Id4 * 23 ^ + Id5 * 23 ^ + Id6 * 23 ^ + Id7 * 23 ^ + Id8 * 23 ^ + Id9 * 23 ^ + Id10 * 23; + + for (int i = 0; i < (int)VertexAttributeDescriptionsCount; i++) + { + hash64 ^= VertexAttributeDescriptions[i].Binding * 23; + hash64 ^= (uint)VertexAttributeDescriptions[i].Format * 23; + hash64 ^= VertexAttributeDescriptions[i].Location * 23; + hash64 ^= VertexAttributeDescriptions[i].Offset * 23; + } + + for (int i = 0; i < (int)VertexBindingDescriptionsCount; i++) + { + hash64 ^= VertexBindingDescriptions[i].Binding * 23; + hash64 ^= (uint)VertexBindingDescriptions[i].InputRate * 23; + hash64 ^= VertexBindingDescriptions[i].Stride * 23; + } + + for (int i = 0; i < (int)ColorBlendAttachmentStateCount; i++) + { + hash64 ^= ColorBlendAttachmentState[i].BlendEnable * 23; + hash64 ^= (uint)ColorBlendAttachmentState[i].SrcColorBlendFactor * 23; + hash64 ^= (uint)ColorBlendAttachmentState[i].DstColorBlendFactor * 23; + hash64 ^= (uint)ColorBlendAttachmentState[i].ColorBlendOp * 23; + hash64 ^= (uint)ColorBlendAttachmentState[i].SrcAlphaBlendFactor * 23; + hash64 ^= (uint)ColorBlendAttachmentState[i].DstAlphaBlendFactor * 23; + hash64 ^= (uint)ColorBlendAttachmentState[i].AlphaBlendOp * 23; + hash64 ^= (uint)ColorBlendAttachmentState[i].ColorWriteMask * 23; + } + + for (int i = 0; i < (int)ColorBlendAttachmentStateCount; i++) + { + hash64 ^= (uint)AttachmentFormats[i] * 23; + } + + return (int)hash64 ^ ((int)(hash64 >> 32) * 17); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Queries/BufferedQuery.cs b/Ryujinx.Graphics.Vulkan/Queries/BufferedQuery.cs new file mode 100644 index 000000000..1d54b9e10 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Queries/BufferedQuery.cs @@ -0,0 +1,206 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Runtime.InteropServices; +using System.Threading; + +namespace Ryujinx.Graphics.Vulkan.Queries +{ + class BufferedQuery : IDisposable + { + private const int MaxQueryRetries = 5000; + private const long DefaultValue = -1; + private const long DefaultValueInt = 0xFFFFFFFF; + + private readonly Vk _api; + private readonly Device _device; + private readonly PipelineFull _pipeline; + + private QueryPool _queryPool; + private bool _isReset; + + private readonly BufferHolder _buffer; + private readonly IntPtr _bufferMap; + private readonly CounterType _type; + private bool _result32Bit; + private bool _isSupported; + + private long _defaultValue; + + public unsafe BufferedQuery(VulkanGraphicsDevice gd, Device device, PipelineFull pipeline, CounterType type, bool result32Bit) + { + _api = gd.Api; + _device = device; + _pipeline = pipeline; + _type = type; + _result32Bit = result32Bit; + + _isSupported = QueryTypeSupported(gd, type); + + if (_isSupported) + { + QueryPipelineStatisticFlags flags = type == CounterType.PrimitivesGenerated ? + QueryPipelineStatisticFlags.QueryPipelineStatisticGeometryShaderPrimitivesBit : 0; + + var queryPoolCreateInfo = new QueryPoolCreateInfo() + { + SType = StructureType.QueryPoolCreateInfo, + QueryCount = 1, + QueryType = GetQueryType(type), + PipelineStatistics = flags + }; + + gd.Api.CreateQueryPool(device, queryPoolCreateInfo, null, out _queryPool).ThrowOnError(); + } + + var buffer = gd.BufferManager.Create(gd, sizeof(long), forConditionalRendering: true); + + _bufferMap = buffer.Map(0, sizeof(long)); + _defaultValue = result32Bit ? DefaultValueInt : DefaultValue; + Marshal.WriteInt64(_bufferMap, _defaultValue); + _buffer = buffer; + } + + private bool QueryTypeSupported(VulkanGraphicsDevice gd, CounterType type) + { + return type switch + { + CounterType.SamplesPassed => true, + CounterType.TransformFeedbackPrimitivesWritten => gd.Capabilities.SupportsTransformFeedbackQueries, + CounterType.PrimitivesGenerated => gd.Capabilities.SupportsGeometryShader, + _ => false + }; + } + + private static QueryType GetQueryType(CounterType type) + { + return type switch + { + CounterType.SamplesPassed => QueryType.Occlusion, + CounterType.PrimitivesGenerated => QueryType.PipelineStatistics, + CounterType.TransformFeedbackPrimitivesWritten => QueryType.TransformFeedbackStreamExt, + _ => QueryType.Occlusion + }; + } + + public Auto GetBuffer() + { + return _buffer.GetBuffer(); + } + + public void Reset() + { + End(false); + Begin(); + } + + public void Begin() + { + if (_isSupported) + { + _pipeline.BeginQuery(this, _queryPool, !_isReset); + } + _isReset = false; + } + + public unsafe void End(bool withResult) + { + if (_isSupported) + { + _pipeline.EndQuery(_queryPool); + } + + if (withResult && _isSupported) + { + Marshal.WriteInt64(_bufferMap, _defaultValue); + _pipeline.CopyQueryResults(this); + } + else + { + // Dummy result, just return 0. + Marshal.WriteInt64(_bufferMap, 0); + } + } + + public bool TryGetResult(out long result) + { + result = Marshal.ReadInt64(_bufferMap); + + return result != _defaultValue; + } + + public long AwaitResult(AutoResetEvent wakeSignal = null) + { + long data = _defaultValue; + + if (wakeSignal == null) + { + while (data == _defaultValue) + { + data = Marshal.ReadInt64(_bufferMap); + } + } + else + { + int iterations = 0; + while (data == _defaultValue && iterations++ < MaxQueryRetries) + { + data = Marshal.ReadInt64(_bufferMap); + if (data == _defaultValue) + { + wakeSignal.WaitOne(1); + } + } + + if (iterations >= MaxQueryRetries) + { + Logger.Error?.Print(LogClass.Gpu, $"Error: Query result {_type} timed out. Took more than {MaxQueryRetries} tries."); + } + } + + return data; + } + + public void PoolReset(CommandBuffer cmd) + { + if (_isSupported) + { + _api.CmdResetQueryPool(cmd, _queryPool, 0, 1); + } + _isReset = true; + } + + public void PoolCopy(CommandBufferScoped cbs) + { + var buffer = _buffer.GetBuffer(cbs.CommandBuffer, true).Get(cbs, 0, sizeof(long)).Value; + + QueryResultFlags flags = QueryResultFlags.QueryResultWaitBit; + + if (!_result32Bit) + { + flags |= QueryResultFlags.QueryResult64Bit; + } + + _api.CmdCopyQueryPoolResults( + cbs.CommandBuffer, + _queryPool, + 0, + 1, + buffer, + 0, + (ulong)(_result32Bit ? sizeof(int) : sizeof(long)), + flags); + } + + public unsafe void Dispose() + { + _buffer.Dispose(); + if (_isSupported) + { + _api.DestroyQueryPool(_device, _queryPool, null); + } + _queryPool = default; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Queries/CounterQueue.cs b/Ryujinx.Graphics.Vulkan/Queries/CounterQueue.cs new file mode 100644 index 000000000..03a004b01 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Queries/CounterQueue.cs @@ -0,0 +1,224 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; +using System.Threading; + +namespace Ryujinx.Graphics.Vulkan.Queries +{ + class CounterQueue : IDisposable + { + private const int QueryPoolInitialSize = 100; + + private readonly VulkanGraphicsDevice _gd; + private readonly Device _device; + private readonly PipelineFull _pipeline; + + public CounterType Type { get; } + public bool Disposed { get; private set; } + + private Queue _events = new Queue(); + private CounterQueueEvent _current; + + private ulong _accumulatedCounter; + private int _waiterCount; + + private object _lock = new object(); + + private Queue _queryPool; + private AutoResetEvent _queuedEvent = new AutoResetEvent(false); + private AutoResetEvent _wakeSignal = new AutoResetEvent(false); + private AutoResetEvent _eventConsumed = new AutoResetEvent(false); + + private Thread _consumerThread; + + internal CounterQueue(VulkanGraphicsDevice gd, Device device, PipelineFull pipeline, CounterType type) + { + _gd = gd; + _device = device; + _pipeline = pipeline; + + Type = type; + + _queryPool = new Queue(QueryPoolInitialSize); + for (int i = 0; i < QueryPoolInitialSize; i++) + { + // AMD Polaris GPUs on Windows seem to have issues reporting 64-bit query results. + _queryPool.Enqueue(new BufferedQuery(_gd, _device, _pipeline, type, gd.IsAmdWindows)); + } + + _current = new CounterQueueEvent(this, type, 0); + + _consumerThread = new Thread(EventConsumer); + _consumerThread.Start(); + } + + private void EventConsumer() + { + while (!Disposed) + { + CounterQueueEvent evt = null; + lock (_lock) + { + if (_events.Count > 0) + { + evt = _events.Dequeue(); + } + } + + if (evt == null) + { + _queuedEvent.WaitOne(); // No more events to go through, wait for more. + } + else + { + // Spin-wait rather than sleeping if there are any waiters, by passing null instead of the wake signal. + evt.TryConsume(ref _accumulatedCounter, true, _waiterCount == 0 ? _wakeSignal : null); + } + + if (_waiterCount > 0) + { + _eventConsumed.Set(); + } + } + } + + internal BufferedQuery GetQueryObject() + { + // Creating/disposing query objects on a context we're sharing with will cause issues. + // So instead, make a lot of query objects on the main thread and reuse them. + + lock (_lock) + { + if (_queryPool.Count > 0) + { + BufferedQuery result = _queryPool.Dequeue(); + return result; + } + else + { + return new BufferedQuery(_gd, _device, _pipeline, Type, _gd.IsAmdWindows); + } + } + } + + internal void ReturnQueryObject(BufferedQuery query) + { + lock (_lock) + { + _pipeline.ResetQuery(query); + _queryPool.Enqueue(query); + } + } + + public CounterQueueEvent QueueReport(EventHandler resultHandler, ulong lastDrawIndex, bool hostReserved) + { + CounterQueueEvent result; + ulong draws = lastDrawIndex - _current.DrawIndex; + + lock (_lock) + { + // A query's result only matters if more than one draw was performed during it. + // Otherwise, dummy it out and return 0 immediately. + + if (hostReserved) + { + // This counter event is guaranteed to be available for host conditional rendering. + _current.ReserveForHostAccess(); + } + + _current.Complete(draws > 0 && Type != CounterType.TransformFeedbackPrimitivesWritten); + _events.Enqueue(_current); + + _current.OnResult += resultHandler; + + result = _current; + + _current = new CounterQueueEvent(this, Type, lastDrawIndex); + } + + _queuedEvent.Set(); + + return result; + } + + public void QueueReset(ulong lastDrawIndex) + { + ulong draws = lastDrawIndex - _current.DrawIndex; + + lock (_lock) + { + _current.Clear(draws != 0); + } + } + + public void Flush(bool blocking) + { + if (!blocking) + { + // Just wake the consumer thread - it will update the queries. + _wakeSignal.Set(); + return; + } + + lock (_lock) + { + // Tell the queue to process all events. + while (_events.Count > 0) + { + CounterQueueEvent flush = _events.Peek(); + if (!flush.TryConsume(ref _accumulatedCounter, true)) + { + return; // If not blocking, then return when we encounter an event that is not ready yet. + } + _events.Dequeue(); + } + } + } + + public void FlushTo(CounterQueueEvent evt) + { + // Flush the counter queue on the main thread. + Interlocked.Increment(ref _waiterCount); + + _wakeSignal.Set(); + + while (!evt.Disposed) + { + _eventConsumed.WaitOne(1); + } + + Interlocked.Decrement(ref _waiterCount); + } + + public void Dispose() + { + lock (_lock) + { + while (_events.Count > 0) + { + CounterQueueEvent evt = _events.Dequeue(); + + evt.Dispose(); + } + + Disposed = true; + } + + _queuedEvent.Set(); + + _consumerThread.Join(); + + _current?.Dispose(); + + foreach (BufferedQuery query in _queryPool) + { + query.Dispose(); + } + + _queuedEvent.Dispose(); + _wakeSignal.Dispose(); + _eventConsumed.Dispose(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Queries/CounterQueueEvent.cs b/Ryujinx.Graphics.Vulkan/Queries/CounterQueueEvent.cs new file mode 100644 index 000000000..241fe1ee0 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Queries/CounterQueueEvent.cs @@ -0,0 +1,167 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Threading; + +namespace Ryujinx.Graphics.Vulkan.Queries +{ + class CounterQueueEvent : ICounterEvent + { + public event EventHandler OnResult; + + public CounterType Type { get; } + public bool ClearCounter { get; private set; } + + public bool Disposed { get; private set; } + public bool Invalid { get; set; } + + public ulong DrawIndex { get; } + + private CounterQueue _queue; + private BufferedQuery _counter; + + private bool _hostAccessReserved = false; + private int _refCount = 1; // Starts with a reference from the counter queue. + + private object _lock = new object(); + private ulong _result = ulong.MaxValue; + + public CounterQueueEvent(CounterQueue queue, CounterType type, ulong drawIndex) + { + _queue = queue; + + _counter = queue.GetQueryObject(); + Type = type; + + DrawIndex = drawIndex; + + _counter.Begin(); + } + + public Auto GetBuffer() + { + return _counter.GetBuffer(); + } + + internal void Clear(bool counterReset) + { + if (counterReset) + { + _counter.Reset(); + } + + ClearCounter = true; + } + + internal void Complete(bool withResult) + { + _counter.End(withResult); + } + + internal bool TryConsume(ref ulong result, bool block, AutoResetEvent wakeSignal = null) + { + lock (_lock) + { + if (Disposed) + { + return true; + } + + if (ClearCounter) + { + result = 0; + } + + long queryResult; + + if (block) + { + queryResult = _counter.AwaitResult(wakeSignal); + } + else + { + if (!_counter.TryGetResult(out queryResult)) + { + return false; + } + } + + result += (ulong)queryResult; + + _result = result; + + OnResult?.Invoke(this, result); + + Dispose(); // Return the our resources to the pool. + + return true; + } + } + + public void Flush() + { + if (Disposed) + { + return; + } + + // Tell the queue to process all events up to this one. + _queue.FlushTo(this); + } + + public void DecrementRefCount() + { + if (Interlocked.Decrement(ref _refCount) == 0) + { + DisposeInternal(); + } + } + + public bool ReserveForHostAccess() + { + if (_hostAccessReserved) + { + return true; + } + + if (IsValueAvailable()) + { + return false; + } + + if (Interlocked.Increment(ref _refCount) == 1) + { + Interlocked.Decrement(ref _refCount); + + return false; + } + + _hostAccessReserved = true; + + return true; + } + + public void ReleaseHostAccess() + { + _hostAccessReserved = false; + + DecrementRefCount(); + } + + private void DisposeInternal() + { + _queue.ReturnQueryObject(_counter); + } + + private bool IsValueAvailable() + { + return _result != ulong.MaxValue || _counter.TryGetResult(out _); + } + + public void Dispose() + { + Disposed = true; + + DecrementRefCount(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Queries/Counters.cs b/Ryujinx.Graphics.Vulkan/Queries/Counters.cs new file mode 100644 index 000000000..43b44d2d4 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Queries/Counters.cs @@ -0,0 +1,58 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; + +namespace Ryujinx.Graphics.Vulkan.Queries +{ + class Counters : IDisposable + { + private readonly CounterQueue[] _counterQueues; + private readonly PipelineFull _pipeline; + + public Counters(VulkanGraphicsDevice gd, Device device, PipelineFull pipeline) + { + _pipeline = pipeline; + + int count = Enum.GetNames(typeof(CounterType)).Length; + + _counterQueues = new CounterQueue[count]; + + for (int index = 0; index < count; index++) + { + CounterType type = (CounterType)index; + _counterQueues[index] = new CounterQueue(gd, device, pipeline, type); + } + } + + public CounterQueueEvent QueueReport(CounterType type, EventHandler resultHandler, bool hostReserved) + { + return _counterQueues[(int)type].QueueReport(resultHandler, _pipeline.DrawCount, hostReserved); + } + + public void QueueReset(CounterType type) + { + _counterQueues[(int)type].QueueReset(_pipeline.DrawCount); + } + + public void Update() + { + foreach (var queue in _counterQueues) + { + queue.Flush(false); + } + } + + public void Flush(CounterType type) + { + _counterQueues[(int)type].Flush(true); + } + + public void Dispose() + { + foreach (var queue in _counterQueues) + { + queue.Dispose(); + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Ryujinx.Graphics.Vulkan.csproj b/Ryujinx.Graphics.Vulkan/Ryujinx.Graphics.Vulkan.csproj new file mode 100644 index 000000000..fe22f3f5f --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Ryujinx.Graphics.Vulkan.csproj @@ -0,0 +1,31 @@ + + + + net6.0 + + + + true + + + + true + + + + + + + + + + + + + + + + + + + diff --git a/Ryujinx.Graphics.Vulkan/SamplerHolder.cs b/Ryujinx.Graphics.Vulkan/SamplerHolder.cs new file mode 100644 index 000000000..a7e493f92 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/SamplerHolder.cs @@ -0,0 +1,117 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; + +namespace Ryujinx.Graphics.Vulkan +{ + class SamplerHolder : ISampler + { + private readonly VulkanGraphicsDevice _gd; + private readonly Auto _sampler; + + public unsafe SamplerHolder(VulkanGraphicsDevice gd, Device device, GAL.SamplerCreateInfo info) + { + _gd = gd; + + gd.Samplers.Add(this); + + (Filter minFilter, SamplerMipmapMode mipFilter) = EnumConversion.Convert(info.MinFilter); + + float minLod = info.MinLod; + float maxLod = info.MaxLod; + + if (info.MinFilter == MinFilter.Nearest || info.MinFilter == MinFilter.Linear) + { + minLod = 0; + maxLod = 0.25f; + } + + var borderColor = GetConstrainedBorderColor(info.BorderColor, out var cantConstrain); + + var samplerCreateInfo = new Silk.NET.Vulkan.SamplerCreateInfo() + { + SType = StructureType.SamplerCreateInfo, + MagFilter = info.MagFilter.Convert(), + MinFilter = minFilter, + MipmapMode = mipFilter, + AddressModeU = info.AddressU.Convert(), + AddressModeV = info.AddressV.Convert(), + AddressModeW = info.AddressP.Convert(), + MipLodBias = info.MipLodBias, + AnisotropyEnable = info.MaxAnisotropy != 1f, + MaxAnisotropy = info.MaxAnisotropy, + CompareEnable = info.CompareMode == CompareMode.CompareRToTexture, + CompareOp = info.CompareOp.Convert(), + MinLod = minLod, + MaxLod = maxLod, + BorderColor = borderColor, + UnnormalizedCoordinates = false // TODO: Use unnormalized coordinates. + }; + + SamplerCustomBorderColorCreateInfoEXT customBorderColor; + + if (cantConstrain && gd.SupportsCustomBorderColor) + { + var color = new ClearColorValue( + info.BorderColor.Red, + info.BorderColor.Green, + info.BorderColor.Blue, + info.BorderColor.Alpha); + + customBorderColor = new SamplerCustomBorderColorCreateInfoEXT() + { + SType = StructureType.SamplerCustomBorderColorCreateInfoExt, + CustomBorderColor = color + }; + + samplerCreateInfo.PNext = &customBorderColor; + } + + gd.Api.CreateSampler(device, samplerCreateInfo, null, out var sampler).ThrowOnError(); + + _sampler = new Auto(new DisposableSampler(gd.Api, device, sampler)); + } + + private static BorderColor GetConstrainedBorderColor(ColorF arbitraryBorderColor, out bool cantConstrain) + { + float r = arbitraryBorderColor.Red; + float g = arbitraryBorderColor.Green; + float b = arbitraryBorderColor.Blue; + float a = arbitraryBorderColor.Alpha; + + if (r == 0f && g == 0f && b == 0f) + { + if (a == 1f) + { + cantConstrain = false; + return BorderColor.FloatOpaqueBlack; + } + else if (a == 0f) + { + cantConstrain = false; + return BorderColor.FloatTransparentBlack; + } + } + else if (r == 1f && g == 1f && b == 1f && a == 1f) + { + cantConstrain = false; + return BorderColor.FloatOpaqueWhite; + } + + cantConstrain = true; + return BorderColor.FloatOpaqueBlack; + } + + public Auto GetSampler() + { + return _sampler; + } + + public void Dispose() + { + if (_gd.Samplers.Remove(this)) + { + _sampler.Dispose(); + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/SemaphoreHolder.cs b/Ryujinx.Graphics.Vulkan/SemaphoreHolder.cs new file mode 100644 index 000000000..aa1b0eafe --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/SemaphoreHolder.cs @@ -0,0 +1,60 @@ +using Silk.NET.Vulkan; +using System; +using System.Threading; +using VkSemaphore = Silk.NET.Vulkan.Semaphore; + +namespace Ryujinx.Graphics.Vulkan +{ + class SemaphoreHolder : IDisposable + { + private readonly Vk _api; + private readonly Device _device; + private VkSemaphore _semaphore; + private int _referenceCount; + public bool _disposed; + + public unsafe SemaphoreHolder(Vk api, Device device) + { + _api = api; + _device = device; + + var semaphoreCreateInfo = new SemaphoreCreateInfo() + { + SType = StructureType.SemaphoreCreateInfo + }; + + api.CreateSemaphore(device, in semaphoreCreateInfo, null, out _semaphore).ThrowOnError(); + + _referenceCount = 1; + } + + public VkSemaphore GetUnsafe() + { + return _semaphore; + } + + public VkSemaphore Get() + { + Interlocked.Increment(ref _referenceCount); + return _semaphore; + } + + public unsafe void Put() + { + if (Interlocked.Decrement(ref _referenceCount) == 0) + { + _api.DestroySemaphore(_device, _semaphore, null); + _semaphore = default; + } + } + + public void Dispose() + { + if (!_disposed) + { + Put(); + _disposed = true; + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Shader.cs b/Ryujinx.Graphics.Vulkan/Shader.cs new file mode 100644 index 000000000..2ced4bea7 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Shader.cs @@ -0,0 +1,167 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using shaderc; +using Silk.NET.Vulkan; +using System; +using System.Runtime.InteropServices; +using System.Threading.Tasks; + +namespace Ryujinx.Graphics.Vulkan +{ + class Shader + { + // The shaderc.net dependency's Options constructor and dispose are not thread safe. + // Take this lock when using them. + private static object _shaderOptionsLock = new object(); + + private readonly Vk _api; + private readonly Device _device; + private readonly ShaderStageFlags _stage; + + private IntPtr _entryPointName; + private ShaderModule _module; + + public ShaderStageFlags StageFlags => _stage; + + public ShaderBindings Bindings { get; } + + public ProgramLinkStatus CompileStatus { private set; get; } + + public readonly Task CompileTask; + + public unsafe Shader(Vk api, Device device, ShaderSource shaderSource) + { + _api = api; + _device = device; + Bindings = shaderSource.Bindings; + + CompileStatus = ProgramLinkStatus.Incomplete; + + _stage = shaderSource.Stage.Convert(); + _entryPointName = Marshal.StringToHGlobalAnsi("main"); + + CompileTask = Task.Run(() => + { + byte[] spirv = shaderSource.BinaryCode; + + if (spirv == null) + { + spirv = GlslToSpirv(shaderSource.Code, shaderSource.Stage); + + if (spirv == null) + { + CompileStatus = ProgramLinkStatus.Failure; + + return; + } + } + + fixed (byte* pCode = spirv) + { + var shaderModuleCreateInfo = new ShaderModuleCreateInfo() + { + SType = StructureType.ShaderModuleCreateInfo, + CodeSize = (uint)spirv.Length, + PCode = (uint*)pCode + }; + + api.CreateShaderModule(device, shaderModuleCreateInfo, null, out _module).ThrowOnError(); + } + + CompileStatus = ProgramLinkStatus.Success; + }); + } + + private unsafe static byte[] GlslToSpirv(string glsl, ShaderStage stage) + { + // TODO: We should generate the correct code on the shader translator instead of doing this compensation. + glsl = glsl.Replace("gl_VertexID", "(gl_VertexIndex - gl_BaseVertex)"); + glsl = glsl.Replace("gl_InstanceID", "(gl_InstanceIndex - gl_BaseInstance)"); + + Options options; + + lock (_shaderOptionsLock) + { + options = new Options(false) + { + SourceLanguage = SourceLanguage.Glsl, + TargetSpirVVersion = new SpirVVersion(1, 5) + }; + } + + options.SetTargetEnvironment(TargetEnvironment.Vulkan, EnvironmentVersion.Vulkan_1_2); + Compiler compiler = new Compiler(options); + var scr = compiler.Compile(glsl, "Ryu", GetShaderCShaderStage(stage)); + + lock (_shaderOptionsLock) + { + options.Dispose(); + } + + if (scr.Status != Status.Success) + { + Logger.Error?.Print(LogClass.Gpu, $"Shader compilation error: {scr.Status} {scr.ErrorMessage}"); + + return null; + } + + var spirvBytes = new Span((void*)scr.CodePointer, (int)scr.CodeLength); + + byte[] code = new byte[(scr.CodeLength + 3) & ~3]; + + spirvBytes.CopyTo(code.AsSpan().Slice(0, (int)scr.CodeLength)); + + return code; + } + + private static ShaderKind GetShaderCShaderStage(ShaderStage stage) + { + switch (stage) + { + case ShaderStage.Vertex: + return ShaderKind.GlslVertexShader; + case ShaderStage.Geometry: + return ShaderKind.GlslGeometryShader; + case ShaderStage.TessellationControl: + return ShaderKind.GlslTessControlShader; + case ShaderStage.TessellationEvaluation: + return ShaderKind.GlslTessEvaluationShader; + case ShaderStage.Fragment: + return ShaderKind.GlslFragmentShader; + case ShaderStage.Compute: + return ShaderKind.GlslComputeShader; + }; + + Logger.Debug?.Print(LogClass.Gpu, $"Invalid {nameof(ShaderStage)} enum value: {stage}."); + + return ShaderKind.GlslVertexShader; + } + + public unsafe PipelineShaderStageCreateInfo GetInfo() + { + return new PipelineShaderStageCreateInfo() + { + SType = StructureType.PipelineShaderStageCreateInfo, + Stage = _stage, + Module = _module, + PName = (byte*)_entryPointName + }; + } + + public void WaitForCompile() + { + CompileTask.Wait(); + } + + public unsafe void Dispose() + { + if (_entryPointName != IntPtr.Zero) + { + _api.DestroyShaderModule(_device, _module, null); + Marshal.FreeHGlobal(_entryPointName); + _entryPointName = IntPtr.Zero; + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/ShaderCollection.cs b/Ryujinx.Graphics.Vulkan/ShaderCollection.cs new file mode 100644 index 000000000..a0cca76c4 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/ShaderCollection.cs @@ -0,0 +1,406 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; + +namespace Ryujinx.Graphics.Vulkan +{ + class ShaderCollection : IProgram + { + private readonly PipelineShaderStageCreateInfo[] _infos; + private readonly Shader[] _shaders; + + private readonly PipelineLayoutCacheEntry _plce; + + public PipelineLayout PipelineLayout => _plce.PipelineLayout; + + public bool HasMinimalLayout { get; } + public bool UsePushDescriptors { get; } + + public uint Stages { get; } + + public int[][][] Bindings { get; } + + public ProgramLinkStatus LinkStatus { get; private set; } + + public bool IsLinked + { + get + { + if (LinkStatus == ProgramLinkStatus.Incomplete) + { + CheckProgramLink(true); + } + + return LinkStatus == ProgramLinkStatus.Success; + } + } + + private HashTableSlim> _graphicsPipelineCache; + private Auto _computePipeline; + + private VulkanGraphicsDevice _gd; + private Device _device; + private bool _initialized; + private bool _isCompute; + + private ProgramPipelineState _state; + private DisposableRenderPass _dummyRenderPass; + private Task _compileTask; + private bool _firstBackgroundUse; + + public ShaderCollection(VulkanGraphicsDevice gd, Device device, ShaderSource[] shaders, bool isMinimal = false) + { + _gd = gd; + _device = device; + + gd.Shaders.Add(this); + + var internalShaders = new Shader[shaders.Length]; + + _infos = new PipelineShaderStageCreateInfo[shaders.Length]; + + LinkStatus = ProgramLinkStatus.Incomplete; + + uint stages = 0; + + for (int i = 0; i < shaders.Length; i++) + { + var shader = new Shader(gd.Api, device, shaders[i]); + + stages |= 1u << shader.StageFlags switch + { + ShaderStageFlags.ShaderStageFragmentBit => 1, + ShaderStageFlags.ShaderStageGeometryBit => 2, + ShaderStageFlags.ShaderStageTessellationControlBit => 3, + ShaderStageFlags.ShaderStageTessellationEvaluationBit => 4, + _ => 0 + }; + + if (shader.StageFlags == ShaderStageFlags.ShaderStageComputeBit) + { + _isCompute = true; + } + + internalShaders[i] = shader; + } + + _shaders = internalShaders; + + bool usePd = !isMinimal && VulkanConfiguration.UsePushDescriptors && _gd.Capabilities.SupportsPushDescriptors; + + _plce = isMinimal + ? gd.PipelineLayoutCache.Create(gd, device, shaders) + : gd.PipelineLayoutCache.GetOrCreate(gd, device, stages, usePd); + + HasMinimalLayout = isMinimal; + UsePushDescriptors = usePd; + + Stages = stages; + + int[][] GrabAll(Func> selector) + { + bool hasAny = false; + int[][] bindings = new int[internalShaders.Length][]; + + for (int i = 0; i < internalShaders.Length; i++) + { + var collection = selector(internalShaders[i].Bindings); + hasAny |= collection.Count != 0; + bindings[i] = collection.ToArray(); + } + + return hasAny ? bindings : Array.Empty(); + } + + Bindings = new[] + { + GrabAll(x => x.UniformBufferBindings), + GrabAll(x => x.StorageBufferBindings), + GrabAll(x => x.TextureBindings), + GrabAll(x => x.ImageBindings) + }; + + _compileTask = Task.CompletedTask; + _firstBackgroundUse = false; + } + + public ShaderCollection( + VulkanGraphicsDevice gd, + Device device, + ShaderSource[] sources, + ProgramPipelineState state, + bool fromCache) : this(gd, device, sources) + { + _state = state; + + _compileTask = BackgroundCompilation(); + _firstBackgroundUse = !fromCache; + } + + private async Task BackgroundCompilation() + { + await Task.WhenAll(_shaders.Select(shader => shader.CompileTask)); + + if (_shaders.Any(shader => shader.CompileStatus == ProgramLinkStatus.Failure)) + { + LinkStatus = ProgramLinkStatus.Failure; + + return; + } + + try + { + if (_isCompute) + { + CreateBackgroundComputePipeline(); + } + else + { + CreateBackgroundGraphicsPipeline(); + } + } + catch (VulkanException e) + { + Logger.Error?.PrintMsg(LogClass.Gpu, $"Background Compilation failed: {e.Message}"); + + LinkStatus = ProgramLinkStatus.Failure; + } + } + + private void EnsureShadersReady() + { + if (!_initialized) + { + CheckProgramLink(true); + + ProgramLinkStatus resultStatus = ProgramLinkStatus.Success; + + for (int i = 0; i < _shaders.Length; i++) + { + var shader = _shaders[i]; + + if (shader.CompileStatus != ProgramLinkStatus.Success) + { + resultStatus = ProgramLinkStatus.Failure; + } + + _infos[i] = shader.GetInfo(); + } + + // If the link status was already set as failure by background compilation, prefer that decision. + if (LinkStatus != ProgramLinkStatus.Failure) + { + LinkStatus = resultStatus; + } + + _initialized = true; + } + } + + public PipelineShaderStageCreateInfo[] GetInfos() + { + EnsureShadersReady(); + + return _infos; + } + + protected unsafe DisposableRenderPass CreateDummyRenderPass() + { + if (_dummyRenderPass.Value.Handle != 0) + { + return _dummyRenderPass; + } + + return _dummyRenderPass = _state.ToRenderPass(_gd, _device); + } + + public void CreateBackgroundComputePipeline() + { + PipelineState pipeline = new PipelineState(); + pipeline.Initialize(); + + pipeline.Stages[0] = _shaders[0].GetInfo(); + pipeline.StagesCount = 1; + pipeline.PipelineLayout = PipelineLayout; + + pipeline.CreateComputePipeline(_gd, _device, this, (_gd.Pipeline as PipelineBase).PipelineCache); + pipeline.Dispose(); + } + + public void CreateBackgroundGraphicsPipeline() + { + // To compile shaders in the background in Vulkan, we need to create valid pipelines using the shader modules. + // The GPU provides pipeline state via the GAL that can be converted into our internal Vulkan pipeline state. + // This should match the pipeline state at the time of the first draw. If it doesn't, then it'll likely be + // close enough that the GPU driver will reuse the compiled shader for the different state. + + // First, we need to create a render pass object compatible with the one that will be used at runtime. + // The active attachment formats have been provided by the abstraction layer. + var renderPass = CreateDummyRenderPass(); + + PipelineState pipeline = _state.ToVulkanPipelineState(_gd); + + // Copy the shader stage info to the pipeline. + var stages = pipeline.Stages.ToSpan(); + + for (int i = 0; i < _shaders.Length; i++) + { + stages[i] = _shaders[i].GetInfo(); + } + + pipeline.StagesCount = (uint)_shaders.Length; + pipeline.PipelineLayout = PipelineLayout; + + pipeline.CreateGraphicsPipeline(_gd, _device, this, (_gd.Pipeline as PipelineBase).PipelineCache, renderPass.Value); + pipeline.Dispose(); + } + + public ProgramLinkStatus CheckProgramLink(bool blocking) + { + if (LinkStatus == ProgramLinkStatus.Incomplete) + { + ProgramLinkStatus resultStatus = ProgramLinkStatus.Success; + + foreach (Shader shader in _shaders) + { + if (shader.CompileStatus == ProgramLinkStatus.Incomplete) + { + if (blocking) + { + // Wait for this shader to finish compiling. + shader.WaitForCompile(); + + if (shader.CompileStatus != ProgramLinkStatus.Success) + { + resultStatus = ProgramLinkStatus.Failure; + } + } + else + { + return ProgramLinkStatus.Incomplete; + } + } + } + + if (!_compileTask.IsCompleted) + { + if (blocking) + { + _compileTask.Wait(); + + if (LinkStatus == ProgramLinkStatus.Failure) + { + return ProgramLinkStatus.Failure; + } + } + else + { + return ProgramLinkStatus.Incomplete; + } + } + + return resultStatus; + } + + return LinkStatus; + } + + public byte[] GetBinary() + { + return null; + } + + public void AddComputePipeline(Auto pipeline) + { + _computePipeline = pipeline; + } + + public void RemoveComputePipeline() + { + _computePipeline = null; + } + + public void AddGraphicsPipeline(ref PipelineUid key, Auto pipeline) + { + (_graphicsPipelineCache ??= new()).Add(ref key, pipeline); + } + + public bool TryGetComputePipeline(out Auto pipeline) + { + pipeline = _computePipeline; + return pipeline != null; + } + + public bool TryGetGraphicsPipeline(ref PipelineUid key, out Auto pipeline) + { + if (_graphicsPipelineCache == null) + { + pipeline = default; + return false; + } + + if (!_graphicsPipelineCache.TryGetValue(ref key, out pipeline)) + { + if (_firstBackgroundUse) + { + Logger.Warning?.Print(LogClass.Gpu, "Background pipeline compile missed on draw - incorrect pipeline state?"); + _firstBackgroundUse = false; + } + + return false; + } + + _firstBackgroundUse = false; + + return true; + } + + public Auto GetNewDescriptorSetCollection( + VulkanGraphicsDevice gd, + int commandBufferIndex, + int setIndex, + out bool isNew) + { + return _plce.GetNewDescriptorSetCollection(gd, commandBufferIndex, setIndex, out isNew); + } + + protected virtual unsafe void Dispose(bool disposing) + { + if (disposing) + { + if (!_gd.Shaders.Remove(this)) + { + return; + } + + for (int i = 0; i < _shaders.Length; i++) + { + _shaders[i].Dispose(); + } + + if (_graphicsPipelineCache != null) + { + foreach (Auto pipeline in _graphicsPipelineCache.Values) + { + pipeline.Dispose(); + } + } + + _computePipeline?.Dispose(); + if (_dummyRenderPass.Value.Handle != 0) + { + _dummyRenderPass.Dispose(); + } + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Shaders/ColorBlitClearAlphaFragmentShaderSource.frag b/Ryujinx.Graphics.Vulkan/Shaders/ColorBlitClearAlphaFragmentShaderSource.frag new file mode 100644 index 000000000..f31316d0a --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Shaders/ColorBlitClearAlphaFragmentShaderSource.frag @@ -0,0 +1,11 @@ +#version 450 core + +layout (binding = 0, set = 2) uniform sampler2D tex; + +layout (location = 0) in vec2 tex_coord; +layout (location = 0) out vec4 colour; + +void main() +{ + colour = vec4(texture(tex, tex_coord).rgb, 1.0f); +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vulkan/Shaders/ColorBlitFragmentShaderSource.frag b/Ryujinx.Graphics.Vulkan/Shaders/ColorBlitFragmentShaderSource.frag new file mode 100644 index 000000000..89dc1ff8f --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Shaders/ColorBlitFragmentShaderSource.frag @@ -0,0 +1,11 @@ +#version 450 core + +layout (binding = 0, set = 2) uniform sampler2D tex; + +layout (location = 0) in vec2 tex_coord; +layout (location = 0) out vec4 colour; + +void main() +{ + colour = texture(tex, tex_coord); +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vulkan/Shaders/ColorBlitVertexShaderSource.vert b/Ryujinx.Graphics.Vulkan/Shaders/ColorBlitVertexShaderSource.vert new file mode 100644 index 000000000..be93a64dc --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Shaders/ColorBlitVertexShaderSource.vert @@ -0,0 +1,20 @@ +#version 450 core + +layout (std140, binding = 1) uniform tex_coord_in +{ + vec4 tex_coord_in_data; +}; + +layout (location = 0) out vec2 tex_coord; + +void main() +{ + int low = gl_VertexIndex & 1; + int high = gl_VertexIndex >> 1; + tex_coord.x = tex_coord_in_data[low]; + tex_coord.y = tex_coord_in_data[2 + high]; + gl_Position.x = (float(low) - 0.5f) * 2.0f; + gl_Position.y = (float(high) - 0.5f) * 2.0f; + gl_Position.z = 0.0f; + gl_Position.w = 1.0f; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vulkan/Shaders/ColorClearFragmentShaderSource.frag b/Ryujinx.Graphics.Vulkan/Shaders/ColorClearFragmentShaderSource.frag new file mode 100644 index 000000000..ddd4369c4 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Shaders/ColorClearFragmentShaderSource.frag @@ -0,0 +1,9 @@ +#version 450 core + +layout (location = 0) in vec4 clear_colour; +layout (location = 0) out vec4 colour; + +void main() +{ + colour = clear_colour; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vulkan/Shaders/ColorClearVertexShaderSource.vert b/Ryujinx.Graphics.Vulkan/Shaders/ColorClearVertexShaderSource.vert new file mode 100644 index 000000000..2f1b9b2c9 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Shaders/ColorClearVertexShaderSource.vert @@ -0,0 +1,19 @@ +#version 450 core + +layout (std140, binding = 1) uniform clear_colour_in +{ + vec4 clear_colour_in_data; +}; + +layout (location = 0) out vec4 clear_colour; + +void main() +{ + int low = gl_VertexIndex & 1; + int high = gl_VertexIndex >> 1; + clear_colour = clear_colour_in_data; + gl_Position.x = (float(low) - 0.5f) * 2.0f; + gl_Position.y = (float(high) - 0.5f) * 2.0f; + gl_Position.z = 0.0f; + gl_Position.w = 1.0f; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vulkan/Shaders/ShaderBinaries.cs b/Ryujinx.Graphics.Vulkan/Shaders/ShaderBinaries.cs new file mode 100644 index 000000000..b21407c3c --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Shaders/ShaderBinaries.cs @@ -0,0 +1,314 @@ +using System; + +namespace Ryujinx.Graphics.Vulkan.Shaders +{ + static class ShaderBinaries + { + public static readonly byte[] ColorBlitClearAlphaFragmentShaderSource = new byte[] + { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0A, 0x00, 0x08, 0x00, 0x1B, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x63, 0x6F, 0x6C, 0x6F, + 0x75, 0x72, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x00, + 0x05, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x5F, 0x63, 0x6F, 0x6F, 0x72, + 0x64, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x03, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3F, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + }; + + public static readonly byte[] ColorBlitFragmentShaderSource = new byte[] + { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0A, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x63, 0x6F, 0x6C, 0x6F, + 0x75, 0x72, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x00, + 0x05, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x5F, 0x63, 0x6F, 0x6F, 0x72, + 0x64, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x03, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + }; + + public static readonly byte[] ColorBlitVertexShaderSource = new byte[] + { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0A, 0x00, 0x08, 0x00, 0x3F, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x49, 0x6E, 0x64, + 0x65, 0x78, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x5F, + 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x74, 0x65, 0x78, 0x5F, 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x5F, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x08, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x5F, + 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x5F, 0x69, 0x6E, 0x5F, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, + 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x07, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, + 0x6E, 0x63, 0x65, 0x00, 0x06, 0x00, 0x07, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x43, 0x75, 0x6C, 0x6C, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, + 0x05, 0x00, 0x03, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x06, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x2B, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3F, 0x2B, 0x00, 0x04, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x2B, 0x00, 0x04, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x2B, 0x00, 0x04, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x1B, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x0D, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x32, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x6F, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x83, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x2F, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x3E, 0x00, 0x03, 0x00, 0x38, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x3A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x20, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x3E, 0x00, 0x00, 0x00, + 0x3C, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + }; + + public static readonly byte[] ColorClearFragmentShaderSource = new byte[] + { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0A, 0x00, 0x08, 0x00, 0x0D, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, + 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x63, 0x6F, 0x6C, 0x6F, + 0x75, 0x72, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x63, 0x6C, 0x65, 0x61, + 0x72, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x75, 0x72, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + }; + + public static readonly byte[] ColorClearVertexShaderSource = new byte[] + { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0A, 0x00, 0x08, 0x00, 0x36, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0F, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, + 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x0A, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x49, 0x6E, 0x64, + 0x65, 0x78, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x14, 0x00, 0x00, 0x00, 0x63, 0x6C, 0x65, 0x61, + 0x72, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x75, 0x72, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x15, 0x00, 0x00, 0x00, 0x63, 0x6C, 0x65, 0x61, 0x72, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x75, 0x72, + 0x5F, 0x69, 0x6E, 0x00, 0x06, 0x00, 0x09, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x63, 0x6C, 0x65, 0x61, 0x72, 0x5F, 0x63, 0x6F, 0x6C, 0x6F, 0x75, 0x72, 0x5F, 0x69, 0x6E, 0x5F, + 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x17, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, + 0x65, 0x72, 0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x73, 0x69, 0x74, + 0x69, 0x6F, 0x6E, 0x00, 0x06, 0x00, 0x07, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x67, 0x6C, 0x5F, 0x50, 0x6F, 0x69, 0x6E, 0x74, 0x53, 0x69, 0x7A, 0x65, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x07, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, + 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, 0x06, 0x00, 0x07, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x75, 0x6C, 0x6C, 0x44, + 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x2A, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x1F, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x17, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, + 0x3B, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, + 0x16, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, + 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x06, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, + 0x1E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x1F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3F, 0x2B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x40, 0x2B, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x29, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3F, 0x2B, 0x00, 0x04, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, + 0xC7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, + 0x0C, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x1A, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, + 0x12, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x29, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x2A, 0x00, 0x00, 0x00, + 0x27, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x85, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, + 0x2E, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x29, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x1D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x06, 0x00, 0x29, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x32, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x29, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, + 0x35, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, + }; + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vulkan/StagingBuffer.cs b/Ryujinx.Graphics.Vulkan/StagingBuffer.cs new file mode 100644 index 000000000..441ff119a --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/StagingBuffer.cs @@ -0,0 +1,194 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Vulkan +{ + class StagingBuffer : IDisposable + { + private const int BufferSize = 16 * 1024 * 1024; + + private int _freeOffset; + private int _freeSize; + + private readonly VulkanGraphicsDevice _gd; + private readonly BufferHolder _buffer; + + private struct PendingCopy + { + public FenceHolder Fence { get; } + public int Size { get; } + + public PendingCopy(FenceHolder fence, int size) + { + Fence = fence; + Size = size; + fence.Get(); + } + } + + private readonly Queue _pendingCopies; + + public StagingBuffer(VulkanGraphicsDevice gd, BufferManager bufferManager) + { + _gd = gd; + _buffer = bufferManager.Create(gd, BufferSize); + _pendingCopies = new Queue(); + _freeSize = BufferSize; + } + + public unsafe void PushData(CommandBufferPool cbp, CommandBufferScoped? cbs, Action endRenderPass, BufferHolder dst, int dstOffset, ReadOnlySpan data) + { + bool isRender = cbs != null; + CommandBufferScoped scoped = cbs ?? cbp.Rent(); + + // Must push all data to the buffer. If it can't fit, split it up. + + endRenderPass?.Invoke(); + + while (data.Length > 0) + { + if (_freeSize < data.Length) + { + FreeCompleted(); + } + + while (_freeSize == 0) + { + if (!WaitFreeCompleted(cbp)) + { + if (isRender) + { + _gd.FlushAllCommands(); + scoped = cbp.Rent(); + isRender = false; + } + else + { + scoped = cbp.ReturnAndRent(scoped); + } + } + } + + int chunkSize = Math.Min(_freeSize, data.Length); + + PushDataImpl(scoped, dst, dstOffset, data.Slice(0, chunkSize)); + + dstOffset += chunkSize; + data = data.Slice(chunkSize); + } + + if (!isRender) + { + scoped.Dispose(); + } + } + + private void PushDataImpl(CommandBufferScoped cbs, BufferHolder dst, int dstOffset, ReadOnlySpan data) + { + var srcBuffer = _buffer.GetBuffer(); + var dstBuffer = dst.GetBuffer(); + + int offset = _freeOffset; + int capacity = BufferSize - offset; + if (capacity < data.Length) + { + _buffer.SetDataUnchecked(offset, data.Slice(0, capacity)); + _buffer.SetDataUnchecked(0, data.Slice(capacity)); + + BufferHolder.Copy(_gd, cbs, srcBuffer, dstBuffer, offset, dstOffset, capacity); + BufferHolder.Copy(_gd, cbs, srcBuffer, dstBuffer, 0, dstOffset + capacity, data.Length - capacity); + } + else + { + _buffer.SetDataUnchecked(offset, data); + + BufferHolder.Copy(_gd, cbs, srcBuffer, dstBuffer, offset, dstOffset, data.Length); + } + + _freeOffset = (offset + data.Length) & (BufferSize - 1); + _freeSize -= data.Length; + Debug.Assert(_freeSize >= 0); + + _pendingCopies.Enqueue(new PendingCopy(cbs.GetFence(), data.Length)); + } + + public unsafe bool TryPushData(CommandBufferScoped cbs, Action endRenderPass, BufferHolder dst, int dstOffset, ReadOnlySpan data) + { + if (data.Length > BufferSize) + { + return false; + } + + if (_freeSize < data.Length) + { + FreeCompleted(); + + if (_freeSize < data.Length) + { + return false; + } + } + + endRenderPass(); + + PushDataImpl(cbs, dst, dstOffset, data); + + return true; + } + + private bool WaitFreeCompleted(CommandBufferPool cbp) + { + if (_pendingCopies.TryPeek(out var pc)) + { + if (!pc.Fence.IsSignaled()) + { + if (cbp.IsFenceOnRentedCommandBuffer(pc.Fence)) + { + return false; + } + + pc.Fence.Wait(); + } + + var dequeued = _pendingCopies.Dequeue(); + Debug.Assert(dequeued.Fence == pc.Fence); + _freeSize += pc.Size; + pc.Fence.Put(); + } + + return true; + } + + private void FreeCompleted() + { + FenceHolder signalledFence = null; + while (_pendingCopies.TryPeek(out var pc) && (pc.Fence == signalledFence || pc.Fence.IsSignaled())) + { + signalledFence = pc.Fence; // Already checked - don't need to do it again. + var dequeued = _pendingCopies.Dequeue(); + Debug.Assert(dequeued.Fence == pc.Fence); + _freeSize += pc.Size; + pc.Fence.Put(); + } + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _buffer.Dispose(); + + while (_pendingCopies.TryDequeue(out var pc)) + { + pc.Fence.Put(); + } + } + } + + public void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/SyncManager.cs b/Ryujinx.Graphics.Vulkan/SyncManager.cs new file mode 100644 index 000000000..a0b2e7df7 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/SyncManager.cs @@ -0,0 +1,122 @@ +using Ryujinx.Common.Logging; +using Silk.NET.Vulkan; +using System.Collections.Generic; +using System.Linq; + +namespace Ryujinx.Graphics.Vulkan +{ + class SyncManager + { + private class SyncHandle + { + public ulong ID; + public MultiFenceHolder Waitable; + } + + private ulong _firstHandle = 0; + + private readonly VulkanGraphicsDevice _gd; + private readonly Device _device; + private List _handles; + + public SyncManager(VulkanGraphicsDevice gd, Device device) + { + _gd = gd; + _device = device; + _handles = new List(); + } + + public void Create(ulong id) + { + MultiFenceHolder waitable = new MultiFenceHolder(); + + _gd.FlushAllCommands(); + _gd.CommandBufferPool.AddWaitable(waitable); + + SyncHandle handle = new SyncHandle + { + ID = id, + Waitable = waitable + }; + + lock (_handles) + { + _handles.Add(handle); + } + } + + public void Wait(ulong id) + { + SyncHandle result = null; + + lock (_handles) + { + if ((long)(_firstHandle - id) > 0) + { + return; // The handle has already been signalled or deleted. + } + + foreach (SyncHandle handle in _handles) + { + if (handle.ID == id) + { + result = handle; + break; + } + } + } + + if (result != null) + { + lock (result) + { + if (result.Waitable == null) + { + return; + } + + bool signaled = result.Waitable.WaitForFences(_gd.Api, _device, 1000000000); + if (!signaled) + { + Logger.Error?.PrintMsg(LogClass.Gpu, $"GL Sync Object {result.ID} failed to signal within 1000ms. Continuing..."); + } + } + } + } + + public void Cleanup() + { + // Iterate through handles and remove any that have already been signalled. + + while (true) + { + SyncHandle first = null; + lock (_handles) + { + first = _handles.FirstOrDefault(); + } + + if (first == null) break; + + bool signaled = first.Waitable.WaitForFences(_gd.Api, _device, 0); + if (signaled) + { + // Delete the sync object. + lock (_handles) + { + lock (first) + { + _firstHandle = first.ID + 1; + _handles.RemoveAt(0); + first.Waitable = null; + } + } + } else + { + // This sync handle and any following have not been reached yet. + break; + } + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/TextureBuffer.cs b/Ryujinx.Graphics.Vulkan/TextureBuffer.cs new file mode 100644 index 000000000..941f41b41 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/TextureBuffer.cs @@ -0,0 +1,150 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class TextureBuffer : ITexture + { + private readonly VulkanGraphicsDevice _gd; + + private BufferHandle _bufferHandle; + private int _offset; + private int _size; + private Auto _bufferView; + private Dictionary> _selfManagedViews; + + public int Width { get; } + public int Height { get; } + + public VkFormat VkFormat { get; } + + public float ScaleFactor { get; } + + public TextureBuffer(VulkanGraphicsDevice gd, TextureCreateInfo info, float scale) + { + _gd = gd; + Width = info.Width; + Height = info.Height; + VkFormat = FormatTable.GetFormat(info.Format); + ScaleFactor = scale; + + gd.Textures.Add(this); + } + + public void CopyTo(ITexture destination, int firstLayer, int firstLevel) + { + throw new NotSupportedException(); + } + + public void CopyTo(ITexture destination, int srcLayer, int dstLayer, int srcLevel, int dstLevel) + { + throw new NotSupportedException(); + } + + public void CopyTo(ITexture destination, Extents2D srcRegion, Extents2D dstRegion, bool linearFilter) + { + throw new NotSupportedException(); + } + + public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel) + { + throw new NotSupportedException(); + } + + public ReadOnlySpan GetData() + { + return _gd.GetBufferData(_bufferHandle, _offset, _size); + } + + public ReadOnlySpan GetData(int layer, int level) + { + return GetData(); + } + + public void Release() + { + if (_gd.Textures.Remove(this)) + { + ReleaseImpl(); + } + } + + private void ReleaseImpl() + { + if (_selfManagedViews != null) + { + foreach (var bufferView in _selfManagedViews.Values) + { + bufferView.Dispose(); + } + + _selfManagedViews = null; + } + + _bufferView?.Dispose(); + _bufferView = null; + } + + public void SetData(ReadOnlySpan data) + { + _gd.SetBufferData(_bufferHandle, _offset, data); + } + + public void SetData(ReadOnlySpan data, int layer, int level) + { + throw new NotSupportedException(); + } + + public void SetStorage(BufferRange buffer) + { + if (_bufferHandle == buffer.Handle && + _offset == buffer.Offset && + _size == buffer.Size) + { + return; + } + + _bufferHandle = buffer.Handle; + _offset = buffer.Offset; + _size = buffer.Size; + + ReleaseImpl();; + } + + public BufferView GetBufferView(CommandBufferScoped cbs) + { + if (_bufferView == null) + { + _bufferView = _gd.BufferManager.CreateView(_bufferHandle, VkFormat, _offset, _size); + } + + return _bufferView?.Get(cbs, _offset, _size).Value ?? default; + } + + public BufferView GetBufferView(CommandBufferScoped cbs, GAL.Format format) + { + var vkFormat = FormatTable.GetFormat(format); + if (vkFormat == VkFormat) + { + return GetBufferView(cbs); + } + + if (_selfManagedViews != null && _selfManagedViews.TryGetValue(format, out var bufferView)) + { + return bufferView.Get(cbs, _offset, _size).Value; + } + + bufferView = _gd.BufferManager.CreateView(_bufferHandle, vkFormat, _offset, _size); + + if (bufferView != null) + { + (_selfManagedViews ??= new Dictionary>()).Add(format, bufferView); + } + + return bufferView?.Get(cbs, _offset, _size).Value ?? default; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/TextureCopy.cs b/Ryujinx.Graphics.Vulkan/TextureCopy.cs new file mode 100644 index 000000000..05e110936 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/TextureCopy.cs @@ -0,0 +1,359 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Numerics; + +namespace Ryujinx.Graphics.Vulkan +{ + static class TextureCopy + { + public static void Blit( + Vk api, + CommandBuffer commandBuffer, + Image srcImage, + Image dstImage, + TextureCreateInfo srcInfo, + TextureCreateInfo dstInfo, + Extents2D srcRegion, + Extents2D dstRegion, + int srcLayer, + int dstLayer, + int srcLevel, + int dstLevel, + int layers, + int levels, + bool linearFilter, + ImageAspectFlags srcAspectFlags = 0, + ImageAspectFlags dstAspectFlags = 0) + { + static (Offset3D, Offset3D) ExtentsToOffset3D(Extents2D extents, int width, int height, int level) + { + static int Clamp(int value, int max) + { + return Math.Clamp(value, 0, max); + } + + var xy1 = new Offset3D(Clamp(extents.X1, width) >> level, Clamp(extents.Y1, height) >> level, 0); + var xy2 = new Offset3D(Clamp(extents.X2, width) >> level, Clamp(extents.Y2, height) >> level, 1); + + return (xy1, xy2); + } + + if (srcAspectFlags == 0) + { + srcAspectFlags = srcInfo.Format.ConvertAspectFlags(); + } + + if (dstAspectFlags == 0) + { + dstAspectFlags = dstInfo.Format.ConvertAspectFlags(); + } + + var srcOffsets = new ImageBlit.SrcOffsetsBuffer(); + var dstOffsets = new ImageBlit.DstOffsetsBuffer(); + + var filter = linearFilter && !dstInfo.Format.IsDepthOrStencil() ? Filter.Linear : Filter.Nearest; + + TextureView.InsertImageBarrier( + api, + commandBuffer, + srcImage, + TextureStorage.DefaultAccessMask, + AccessFlags.AccessTransferReadBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + PipelineStageFlags.PipelineStageTransferBit, + srcAspectFlags, + srcLayer, + srcLevel, + layers, + levels); + + uint copySrcLevel = (uint)srcLevel; + uint copyDstLevel = (uint)dstLevel; + + for (int level = 0; level < levels; level++) + { + var srcSl = new ImageSubresourceLayers(srcAspectFlags, copySrcLevel, (uint)srcLayer, (uint)layers); + var dstSl = new ImageSubresourceLayers(dstAspectFlags, copyDstLevel, (uint)dstLayer, (uint)layers); + + (srcOffsets.Element0, srcOffsets.Element1) = ExtentsToOffset3D(srcRegion, srcInfo.Width, srcInfo.Height, level); + (dstOffsets.Element0, dstOffsets.Element1) = ExtentsToOffset3D(dstRegion, dstInfo.Width, dstInfo.Height, level); + + var region = new ImageBlit() + { + SrcSubresource = srcSl, + SrcOffsets = srcOffsets, + DstSubresource = dstSl, + DstOffsets = dstOffsets + }; + + api.CmdBlitImage(commandBuffer, srcImage, ImageLayout.General, dstImage, ImageLayout.General, 1, region, filter); + + copySrcLevel++; + copyDstLevel++; + + if (srcInfo.Target == Target.Texture3D || dstInfo.Target == Target.Texture3D) + { + layers = Math.Max(1, layers >> 1); + } + } + + TextureView.InsertImageBarrier( + api, + commandBuffer, + dstImage, + AccessFlags.AccessTransferWriteBit, + TextureStorage.DefaultAccessMask, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + dstAspectFlags, + dstLayer, + dstLevel, + layers, + levels); + } + + public static void Copy( + Vk api, + CommandBuffer commandBuffer, + Image srcImage, + Image dstImage, + TextureCreateInfo srcInfo, + TextureCreateInfo dstInfo, + int srcViewLayer, + int dstViewLayer, + int srcViewLevel, + int dstViewLevel, + int srcLayer, + int dstLayer, + int srcLevel, + int dstLevel) + { + int srcDepth = srcInfo.GetDepthOrLayers(); + int srcLevels = srcInfo.Levels; + + int dstDepth = dstInfo.GetDepthOrLayers(); + int dstLevels = dstInfo.Levels; + + if (dstInfo.Target == Target.Texture3D) + { + dstDepth = Math.Max(1, dstDepth >> dstLevel); + } + + int depth = Math.Min(srcDepth, dstDepth); + int levels = Math.Min(srcLevels, dstLevels); + + Copy( + api, + commandBuffer, + srcImage, + dstImage, + srcInfo, + dstInfo, + srcViewLayer, + dstViewLayer, + srcViewLevel, + dstViewLevel, + srcLayer, + dstLayer, + srcLevel, + dstLevel, + depth, + levels); + } + + private static int ClampLevels(TextureCreateInfo info, int levels) + { + int width = info.Width; + int height = info.Height; + int depth = info.Target == Target.Texture3D ? info.Depth : 1; + + int maxLevels = 1 + BitOperations.Log2((uint)Math.Max(Math.Max(width, height), depth)); + + if (levels > maxLevels) + { + levels = maxLevels; + } + + return levels; + } + + public static void Copy( + Vk api, + CommandBuffer commandBuffer, + Image srcImage, + Image dstImage, + TextureCreateInfo srcInfo, + TextureCreateInfo dstInfo, + int srcViewLayer, + int dstViewLayer, + int srcViewLevel, + int dstViewLevel, + int srcDepthOrLayer, + int dstDepthOrLayer, + int srcLevel, + int dstLevel, + int depthOrLayers, + int levels) + { + int srcZ; + int srcLayer; + int srcDepth; + int srcLayers; + + if (srcInfo.Target == Target.Texture3D) + { + srcZ = srcDepthOrLayer; + srcLayer = 0; + srcDepth = depthOrLayers; + srcLayers = 1; + } + else + { + srcZ = 0; + srcLayer = srcDepthOrLayer; + srcDepth = 1; + srcLayers = depthOrLayers; + } + + int dstZ; + int dstLayer; + int dstDepth; + int dstLayers; + + if (dstInfo.Target == Target.Texture3D) + { + dstZ = dstDepthOrLayer; + dstLayer = 0; + dstDepth = depthOrLayers; + dstLayers = 1; + } + else + { + dstZ = 0; + dstLayer = dstDepthOrLayer; + dstDepth = 1; + dstLayers = depthOrLayers; + } + + int srcWidth = srcInfo.Width; + int srcHeight = srcInfo.Height; + + int dstWidth = dstInfo.Width; + int dstHeight = dstInfo.Height; + + srcWidth = Math.Max(1, srcWidth >> srcLevel); + srcHeight = Math.Max(1, srcHeight >> srcLevel); + + dstWidth = Math.Max(1, dstWidth >> dstLevel); + dstHeight = Math.Max(1, dstHeight >> dstLevel); + + int blockWidth = 1; + int blockHeight = 1; + bool sizeInBlocks = false; + + // When copying from a compressed to a non-compressed format, + // the non-compressed texture will have the size of the texture + // in blocks (not in texels), so we must adjust that size to + // match the size in texels of the compressed texture. + if (!srcInfo.IsCompressed && dstInfo.IsCompressed) + { + srcWidth *= dstInfo.BlockWidth; + srcHeight *= dstInfo.BlockHeight; + blockWidth = dstInfo.BlockWidth; + blockHeight = dstInfo.BlockHeight; + + sizeInBlocks = true; + } + else if (srcInfo.IsCompressed && !dstInfo.IsCompressed) + { + dstWidth *= srcInfo.BlockWidth; + dstHeight *= srcInfo.BlockHeight; + blockWidth = srcInfo.BlockWidth; + blockHeight = srcInfo.BlockHeight; + } + + int width = Math.Min(srcWidth, dstWidth); + int height = Math.Min(srcHeight, dstHeight); + + ImageAspectFlags srcAspect = srcInfo.Format.ConvertAspectFlags(); + ImageAspectFlags dstAspect = dstInfo.Format.ConvertAspectFlags(); + + TextureView.InsertImageBarrier( + api, + commandBuffer, + srcImage, + TextureStorage.DefaultAccessMask, + AccessFlags.AccessTransferReadBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + PipelineStageFlags.PipelineStageTransferBit, + srcAspect, + srcViewLayer + srcLayer, + srcViewLevel + srcLevel, + srcLayers, + levels); + + for (int level = 0; level < levels; level++) + { + // Stop copy if we are already out of the levels range. + if (level >= srcInfo.Levels || dstLevel + level >= dstInfo.Levels) + { + break; + } + + var srcSl = new ImageSubresourceLayers( + srcAspect, + (uint)(srcViewLevel + srcLevel + level), + (uint)(srcViewLayer + srcLayer), + (uint)srcLayers); + + var dstSl = new ImageSubresourceLayers( + dstAspect, + (uint)(dstViewLevel + dstLevel + level), + (uint)(dstViewLayer + dstLayer), + (uint)dstLayers); + + int copyWidth = sizeInBlocks ? BitUtils.DivRoundUp(width, blockWidth) : width; + int copyHeight = sizeInBlocks ? BitUtils.DivRoundUp(height, blockHeight) : height; + + var extent = new Extent3D((uint)copyWidth, (uint)copyHeight, (uint)srcDepth); + + if (srcInfo.Samples > 1 && srcInfo.Samples != dstInfo.Samples) + { + var region = new ImageResolve(srcSl, new Offset3D(0, 0, srcZ), dstSl, new Offset3D(0, 0, dstZ), extent); + + api.CmdResolveImage(commandBuffer, srcImage, ImageLayout.General, dstImage, ImageLayout.General, 1, region); + } + else + { + var region = new ImageCopy(srcSl, new Offset3D(0, 0, srcZ), dstSl, new Offset3D(0, 0, dstZ), extent); + + api.CmdCopyImage(commandBuffer, srcImage, ImageLayout.General, dstImage, ImageLayout.General, 1, region); + } + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + + if (srcInfo.Target == Target.Texture3D) + { + srcDepth = Math.Max(1, srcDepth >> 1); + } + } + + TextureView.InsertImageBarrier( + api, + commandBuffer, + dstImage, + AccessFlags.AccessTransferWriteBit, + TextureStorage.DefaultAccessMask, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + dstAspect, + dstViewLayer + dstLayer, + dstViewLevel + dstLevel, + dstLayers, + levels); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/TextureStorage.cs b/Ryujinx.Graphics.Vulkan/TextureStorage.cs new file mode 100644 index 000000000..2068324cb --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/TextureStorage.cs @@ -0,0 +1,479 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; +using System.Numerics; +using VkBuffer = Silk.NET.Vulkan.Buffer; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class TextureStorage : IDisposable + { + private const MemoryPropertyFlags DefaultImageMemoryFlags = + MemoryPropertyFlags.MemoryPropertyDeviceLocalBit; + + private const ImageUsageFlags DefaultUsageFlags = + ImageUsageFlags.ImageUsageSampledBit | + ImageUsageFlags.ImageUsageTransferSrcBit | + ImageUsageFlags.ImageUsageTransferDstBit; + + public const AccessFlags DefaultAccessMask = + AccessFlags.AccessShaderReadBit | + AccessFlags.AccessShaderWriteBit | + AccessFlags.AccessColorAttachmentReadBit | + AccessFlags.AccessColorAttachmentWriteBit | + AccessFlags.AccessDepthStencilAttachmentReadBit | + AccessFlags.AccessDepthStencilAttachmentWriteBit | + AccessFlags.AccessTransferReadBit | + AccessFlags.AccessTransferWriteBit; + + private readonly VulkanGraphicsDevice _gd; + + private readonly Device _device; + + private TextureCreateInfo _info; + + public TextureCreateInfo Info => _info; + + private readonly Image _image; + private readonly Auto _imageAuto; + private readonly Auto _allocationAuto; + private Auto _foreignAllocationAuto; + + private Dictionary _aliasedStorages; + + private AccessFlags _lastModificationAccess; + private PipelineStageFlags _lastModificationStage; + + public VkFormat VkFormat { get; } + public float ScaleFactor { get; } + + public unsafe TextureStorage( + VulkanGraphicsDevice gd, + PhysicalDevice physicalDevice, + Device device, + TextureCreateInfo info, + float scaleFactor, + Auto foreignAllocation = null) + { + _gd = gd; + _device = device; + _info = info; + ScaleFactor = scaleFactor; + + var format = _gd.FormatCapabilities.ConvertToVkFormat(info.Format); + var levels = (uint)info.Levels; + var layers = (uint)info.GetLayers(); + var depth = (uint)(info.Target == Target.Texture3D ? info.Depth : 1); + + VkFormat = format; + + var type = info.Target.Convert(); + + var extent = new Extent3D((uint)info.Width, (uint)info.Height, depth); + + var sampleCountFlags = ConvertToSampleCountFlags((uint)info.Samples); + + var usage = DefaultUsageFlags; + + if (info.Format.IsDepthOrStencil()) + { + usage |= ImageUsageFlags.ImageUsageDepthStencilAttachmentBit; + } + else if (info.Format.IsRtColorCompatible()) + { + usage |= ImageUsageFlags.ImageUsageColorAttachmentBit; + } + + if (info.Format.IsImageCompatible()) + { + usage |= ImageUsageFlags.ImageUsageStorageBit; + } + + var flags = ImageCreateFlags.ImageCreateMutableFormatBit; + + bool cubeCompatible = info.Width == info.Height && layers >= 6; + + if (type == ImageType.ImageType2D && cubeCompatible) + { + flags |= ImageCreateFlags.ImageCreateCubeCompatibleBit; + } + + if (type == ImageType.ImageType3D) + { + flags |= ImageCreateFlags.ImageCreate2DArrayCompatibleBit; + } + + // System.Console.WriteLine("create image " + type + " " + format + " " + levels + " " + layers + " " + usage + " " + flags); + + var imageCreateInfo = new ImageCreateInfo() + { + SType = StructureType.ImageCreateInfo, + ImageType = type, + Format = format, + Extent = extent, + MipLevels = levels, + ArrayLayers = layers, + Samples = sampleCountFlags, + Tiling = ImageTiling.Optimal, + Usage = usage, + SharingMode = SharingMode.Exclusive, + InitialLayout = ImageLayout.Undefined, + Flags = flags + }; + + gd.Api.CreateImage(device, imageCreateInfo, null, out _image).ThrowOnError(); + + if (foreignAllocation == null) + { + gd.Api.GetImageMemoryRequirements(device, _image, out var requirements); + var allocation = gd.MemoryAllocator.AllocateDeviceMemory(physicalDevice, requirements, DefaultImageMemoryFlags); + + if (allocation.Memory.Handle == 0UL) + { + gd.Api.DestroyImage(device, _image, null); + throw new Exception("Image initialization failed."); + } + + gd.Api.BindImageMemory(device, _image, allocation.Memory, allocation.Offset).ThrowOnError(); + + _allocationAuto = new Auto(allocation); + _imageAuto = new Auto(new DisposableImage(_gd.Api, device, _image), null, _allocationAuto); + + InitialTransition(ImageLayout.Undefined, ImageLayout.General); + } + else + { + _foreignAllocationAuto = foreignAllocation; + foreignAllocation.IncrementReferenceCount(); + var allocation = foreignAllocation.GetUnsafe(); + + gd.Api.BindImageMemory(device, _image, allocation.Memory, allocation.Offset).ThrowOnError(); + + _imageAuto = new Auto(new DisposableImage(_gd.Api, device, _image)); + + InitialTransition(ImageLayout.Preinitialized, ImageLayout.General); + } + } + + public TextureStorage CreateAliasedColorForDepthStorageUnsafe(GAL.Format format) + { + var colorFormat = format switch + { + GAL.Format.S8Uint => GAL.Format.R8Unorm, + GAL.Format.D16Unorm => GAL.Format.R16Unorm, + GAL.Format.S8UintD24Unorm => GAL.Format.R8G8B8A8Unorm, + GAL.Format.D32Float => GAL.Format.R32Float, + GAL.Format.D24UnormS8Uint => GAL.Format.R8G8B8A8Unorm, + GAL.Format.D32FloatS8Uint => GAL.Format.R32G32Float, + _ => throw new ArgumentException($"\"{format}\" is not a supported depth or stencil format.") + }; + + return CreateAliasedStorageUnsafe(colorFormat); + } + + public TextureStorage CreateAliasedStorageUnsafe(GAL.Format format) + { + if (_aliasedStorages == null || !_aliasedStorages.TryGetValue(format, out var storage)) + { + _aliasedStorages ??= new Dictionary(); + + var info = NewCreateInfoWith(ref _info, format, _info.BytesPerPixel); + + storage = new TextureStorage(_gd, default, _device, info, ScaleFactor, _allocationAuto); + + _aliasedStorages.Add(format, storage); + } + + return storage; + } + + public static TextureCreateInfo NewCreateInfoWith(ref TextureCreateInfo info, GAL.Format format, int bytesPerPixel) + { + return NewCreateInfoWith(ref info, format, bytesPerPixel, info.Width, info.Height); + } + + public static TextureCreateInfo NewCreateInfoWith( + ref TextureCreateInfo info, + GAL.Format format, + int bytesPerPixel, + int width, + int height) + { + return new TextureCreateInfo( + width, + height, + info.Depth, + info.Levels, + info.Samples, + info.BlockWidth, + info.BlockHeight, + bytesPerPixel, + format, + info.DepthStencilMode, + info.Target, + info.SwizzleR, + info.SwizzleG, + info.SwizzleB, + info.SwizzleA); + } + + public Auto GetImage() + { + return _imageAuto; + } + + public Image GetImageForViewCreation() + { + return _image; + } + + public bool HasCommandBufferDependency(CommandBufferScoped cbs) + { + if (_foreignAllocationAuto != null) + { + return _foreignAllocationAuto.HasCommandBufferDependency(cbs); + } + else if (_allocationAuto != null) + { + return _allocationAuto.HasCommandBufferDependency(cbs); + } + + return false; + } + + private unsafe void InitialTransition(ImageLayout srcLayout, ImageLayout dstLayout) + { + CommandBufferScoped cbs; + bool useTempCbs = !_gd.CommandBufferPool.OwnedByCurrentThread; + + if (useTempCbs) + { + cbs = _gd.BackgroundResources.Get().GetPool().Rent(); + } + else + { + if (_gd.PipelineInternal != null) + { + cbs = _gd.PipelineInternal.GetPreloadCommandBuffer(); + } + else + { + cbs = _gd.CommandBufferPool.Rent(); + useTempCbs = true; + } + } + + var aspectFlags = _info.Format.ConvertAspectFlags(); + + var subresourceRange = new ImageSubresourceRange(aspectFlags, 0, (uint)_info.Levels, 0, (uint)_info.GetLayers()); + + var barrier = new ImageMemoryBarrier() + { + SType = StructureType.ImageMemoryBarrier, + SrcAccessMask = 0, + DstAccessMask = DefaultAccessMask, + OldLayout = srcLayout, + NewLayout = dstLayout, + SrcQueueFamilyIndex = Vk.QueueFamilyIgnored, + DstQueueFamilyIndex = Vk.QueueFamilyIgnored, + Image = _imageAuto.Get(cbs).Value, + SubresourceRange = subresourceRange + }; + + _gd.Api.CmdPipelineBarrier( + cbs.CommandBuffer, + PipelineStageFlags.PipelineStageTopOfPipeBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + 0, + 0, + null, + 0, + null, + 1, + barrier); + + if (useTempCbs) + { + cbs.Dispose(); + } + } + + public static SampleCountFlags ConvertToSampleCountFlags(uint samples) + { + if (samples == 0 || samples > (uint)SampleCountFlags.SampleCount64Bit) + { + return SampleCountFlags.SampleCount1Bit; + } + + // Round up to the nearest power of two. + return (SampleCountFlags)(1u << (31 - BitOperations.LeadingZeroCount(samples))); + } + + public TextureView CreateView(TextureCreateInfo info, int firstLayer, int firstLevel) + { + return new TextureView(_gd, _device, info, this, firstLayer, firstLevel); + } + + public void CopyFromOrToBuffer( + CommandBuffer commandBuffer, + VkBuffer buffer, + Image image, + int size, + bool to, + int x, + int y, + int dstLayer, + int dstLevel, + int dstLayers, + int dstLevels, + bool singleSlice, + ImageAspectFlags aspectFlags, + bool forFlush) + { + bool is3D = Info.Target == Target.Texture3D; + int width = Info.Width; + int height = Info.Height; + int depth = is3D && !singleSlice ? Info.Depth : 1; + int layer = is3D ? 0 : dstLayer; + int layers = dstLayers; + int levels = dstLevels; + + int offset = 0; + + for (int level = 0; level < levels; level++) + { + int mipSize = Info.GetMipSize(level); + + if (forFlush) + { + mipSize = GetBufferDataLength(mipSize); + } + + int endOffset = offset + mipSize; + + if ((uint)endOffset > (uint)size) + { + break; + } + + int rowLength = (Info.GetMipStride(level) / Info.BytesPerPixel) * Info.BlockWidth; + + var sl = new ImageSubresourceLayers( + aspectFlags, + (uint)(dstLevel + level), + (uint)layer, + (uint)layers); + + var extent = new Extent3D((uint)width, (uint)height, (uint)depth); + + int z = is3D ? dstLayer : 0; + + var region = new BufferImageCopy((ulong)offset, (uint)rowLength, (uint)height, sl, new Offset3D(x, y, z), extent); + + if (to) + { + _gd.Api.CmdCopyImageToBuffer(commandBuffer, image, ImageLayout.General, buffer, 1, region); + } + else + { + _gd.Api.CmdCopyBufferToImage(commandBuffer, buffer, image, ImageLayout.General, 1, region); + } + + offset += mipSize; + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + + if (Info.Target == Target.Texture3D) + { + depth = Math.Max(1, depth >> 1); + } + } + } + + private int GetBufferDataLength(int length) + { + if (NeedsD24S8Conversion()) + { + return length * 2; + } + + return length; + } + + private bool NeedsD24S8Conversion() + { + return FormatCapabilities.IsD24S8(Info.Format) && VkFormat == VkFormat.D32SfloatS8Uint; + } + + public void SetModification(AccessFlags accessFlags, PipelineStageFlags stage) + { + _lastModificationAccess = accessFlags; + _lastModificationStage = stage; + } + + public void InsertBarrier(CommandBufferScoped cbs, AccessFlags dstAccessFlags, PipelineStageFlags dstStageFlags) + { + if (_lastModificationAccess != AccessFlags.AccessNoneKhr) + { + ImageAspectFlags aspectFlags; + + if (_info.Format.IsDepthOrStencil()) + { + if (_info.Format == GAL.Format.S8Uint) + { + aspectFlags = ImageAspectFlags.ImageAspectStencilBit; + } + else if (_info.Format == GAL.Format.D16Unorm || _info.Format == GAL.Format.D32Float) + { + aspectFlags = ImageAspectFlags.ImageAspectDepthBit; + } + else + { + aspectFlags = ImageAspectFlags.ImageAspectDepthBit | ImageAspectFlags.ImageAspectStencilBit; + } + } + else + { + aspectFlags = ImageAspectFlags.ImageAspectColorBit; + } + + TextureView.InsertImageBarrier( + _gd.Api, + cbs.CommandBuffer, + _imageAuto.Get(cbs).Value, + _lastModificationAccess, + dstAccessFlags, + _lastModificationStage, + dstStageFlags, + aspectFlags, + 0, + 0, + _info.GetLayers(), + _info.Levels); + + _lastModificationAccess = AccessFlags.AccessNoneKhr; + } + } + + public void Dispose() + { + if (_aliasedStorages != null) + { + foreach (var storage in _aliasedStorages.Values) + { + storage.Dispose(); + } + + _aliasedStorages.Clear(); + } + + _imageAuto.Dispose(); + _allocationAuto?.Dispose(); + _foreignAllocationAuto?.DecrementReferenceCount(); + _foreignAllocationAuto = null; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/TextureView.cs b/Ryujinx.Graphics.Vulkan/TextureView.cs new file mode 100644 index 000000000..42786a2c8 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/TextureView.cs @@ -0,0 +1,1124 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Collections.Generic; +using VkBuffer = Silk.NET.Vulkan.Buffer; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class TextureView : ITexture, IDisposable + { + private readonly VulkanGraphicsDevice _gd; + + private readonly Device _device; + + private readonly Auto _imageView; + private readonly Auto _imageViewIdentity; + private readonly Auto _imageView2dArray; + private BufferHolder _flushStorage; + private Dictionary _selfManagedViews; + + private TextureCreateInfo _info; + + public TextureCreateInfo Info => _info; + + public TextureStorage Storage { get; } + + public int Width => Info.Width; + public int Height => Info.Height; + public int Layers => Info.GetDepthOrLayers(); + public int FirstLayer { get; } + public int FirstLevel { get; } + public float ScaleFactor => Storage.ScaleFactor; + public VkFormat VkFormat { get; } + public bool Valid { get; private set; } + + public TextureView( + VulkanGraphicsDevice gd, + Device device, + TextureCreateInfo info, + TextureStorage storage, + int firstLayer, + int firstLevel) + { + _gd = gd; + _device = device; + _info = info; + Storage = storage; + FirstLayer = firstLayer; + FirstLevel = firstLevel; + + gd.Textures.Add(this); + + var format = _gd.FormatCapabilities.ConvertToVkFormat(info.Format); + var levels = (uint)info.Levels; + var layers = (uint)info.GetLayers(); + + VkFormat = format; + + var type = info.Target.ConvertView(); + + var swizzleR = info.SwizzleR.Convert(); + var swizzleG = info.SwizzleG.Convert(); + var swizzleB = info.SwizzleB.Convert(); + var swizzleA = info.SwizzleA.Convert(); + + if (info.Format == GAL.Format.R5G5B5A1Unorm || + info.Format == GAL.Format.R5G5B5X1Unorm || + info.Format == GAL.Format.R5G6B5Unorm) + { + var temp = swizzleR; + + swizzleR = swizzleB; + swizzleB = temp; + } + else if (info.Format == GAL.Format.R4G4B4A4Unorm) + { + var tempG = swizzleG; + var tempB = swizzleB; + + swizzleB = swizzleA; + swizzleG = swizzleR; + swizzleR = tempG; + swizzleA = tempB; + } + else if (info.Format == GAL.Format.A1B5G5R5Unorm) + { + var tempB = swizzleB; + var tempA = swizzleA; + + swizzleB = swizzleG; + swizzleA = swizzleR; + swizzleR = tempA; + swizzleG = tempB; + } + + var componentMapping = new ComponentMapping(swizzleR, swizzleG, swizzleB, swizzleA); + + var aspectFlags = info.Format.ConvertAspectFlags(info.DepthStencilMode); + var aspectFlagsDepth = info.Format.ConvertAspectFlags(DepthStencilMode.Depth); + + var subresourceRange = new ImageSubresourceRange(aspectFlags, (uint)firstLevel, levels, (uint)firstLayer, layers); + var subresourceRangeDepth = new ImageSubresourceRange(aspectFlagsDepth, (uint)firstLevel, levels, (uint)firstLayer, layers); + + unsafe Auto CreateImageView(ComponentMapping cm, ImageSubresourceRange sr, ImageViewType viewType) + { + var imageCreateInfo = new ImageViewCreateInfo() + { + SType = StructureType.ImageViewCreateInfo, + Image = storage.GetImageForViewCreation(), + ViewType = viewType, + Format = format, + Components = cm, + SubresourceRange = sr + }; + + gd.Api.CreateImageView(device, imageCreateInfo, null, out var imageView).ThrowOnError(); + return new Auto(new DisposableImageView(gd.Api, device, imageView), null, storage.GetImage()); + } + + _imageView = CreateImageView(componentMapping, subresourceRange, type); + + // Framebuffer attachments and storage images requires a identity component mapping. + var identityComponentMapping = new ComponentMapping( + ComponentSwizzle.R, + ComponentSwizzle.G, + ComponentSwizzle.B, + ComponentSwizzle.A); + + _imageViewIdentity = CreateImageView(identityComponentMapping, subresourceRangeDepth, type); + + // Framebuffer attachments also requires 3D textures to be bound as 2D array. + if (info.Target == Target.Texture3D) + { + subresourceRange = new ImageSubresourceRange(aspectFlags, (uint)firstLevel, levels, (uint)firstLayer, (uint)info.Depth); + + _imageView2dArray = CreateImageView(identityComponentMapping, subresourceRange, ImageViewType.ImageViewType2DArray); + } + + Valid = true; + } + + public Auto GetImage() + { + return Storage.GetImage(); + } + + public Auto GetImageView() + { + return _imageView; + } + + public Auto GetIdentityImageView() + { + return _imageViewIdentity; + } + + public Auto GetImageViewForAttachment() + { + return _imageView2dArray ?? _imageViewIdentity; + } + + public void CopyTo(ITexture destination, int firstLayer, int firstLevel) + { + var src = this; + var dst = (TextureView)destination; + + if (!Valid || !dst.Valid) + { + return; + } + + _gd.PipelineInternal.EndRenderPass(); + + var cbs = _gd.PipelineInternal.CurrentCommandBuffer; + + var srcImage = src.GetImage().Get(cbs).Value; + var dstImage = dst.GetImage().Get(cbs).Value; + + if (src.Info.Target.IsMultisample()) + { + int depth = Math.Min(src.Info.Depth, dst.Info.Depth - firstLayer); + int levels = Math.Min(src.Info.Levels, dst.Info.Levels - firstLevel); + + CopyMSToNonMS(_gd, cbs, src, dst, srcImage, dstImage, 0, firstLayer, 0, firstLevel, depth, levels); + } + else + { + TextureCopy.Copy( + _gd.Api, + cbs.CommandBuffer, + srcImage, + dstImage, + src.Info, + dst.Info, + src.FirstLayer, + dst.FirstLayer, + src.FirstLevel, + dst.FirstLevel, + 0, + firstLayer, + 0, + firstLevel); + } + } + + public void CopyTo(ITexture destination, int srcLayer, int dstLayer, int srcLevel, int dstLevel) + { + var src = this; + var dst = (TextureView)destination; + + if (!Valid || !dst.Valid) + { + return; + } + + _gd.PipelineInternal.EndRenderPass(); + + var cbs = _gd.PipelineInternal.CurrentCommandBuffer; + + var srcImage = src.GetImage().Get(cbs).Value; + var dstImage = dst.GetImage().Get(cbs).Value; + + if (src.Info.Target.IsMultisample()) + { + CopyMSToNonMS(_gd, cbs, src, dst, srcImage, dstImage, srcLayer, dstLayer, srcLevel, dstLevel, 1, 1); + } + else + { + TextureCopy.Copy( + _gd.Api, + cbs.CommandBuffer, + srcImage, + dstImage, + src.Info, + dst.Info, + src.FirstLayer, + dst.FirstLayer, + src.FirstLevel, + dst.FirstLevel, + srcLayer, + dstLayer, + srcLevel, + dstLevel, + 1, + 1); + } + } + + private static void CopyMSToNonMS( + VulkanGraphicsDevice gd, + CommandBufferScoped cbs, + TextureView src, + TextureView dst, + Image srcImage, + Image dstImage, + int srcLayer, + int dstLayer, + int srcLevel, + int dstLevel, + int layers, + int levels) + { + bool differentFormats = src.Info.Format != dst.Info.Format; + + var target = src.Info.Target switch + { + Target.Texture2D => Target.Texture2DMultisample, + Target.Texture2DArray => Target.Texture2DMultisampleArray, + Target.Texture2DMultisampleArray => Target.Texture2DArray, + _ => Target.Texture2D + }; + + var intermmediateTarget = differentFormats ? dst.Info.Target : target; + using var intermmediate = CreateIntermmediateTexture(gd, src, ref dst._info, intermmediateTarget, layers, levels); + var intermmediateImage = intermmediate.GetImage().Get(cbs).Value; + + if (differentFormats) + { + // If the formats are different, the resolve would perform format conversion. + // So we need yet another intermmediate texture and do a copy to reinterpret the + // data into the correct (destination) format, without doing any sort of conversion. + using var intermmediate2 = CreateIntermmediateTexture(gd, src, ref src._info, target, layers, levels); + var intermmediate2Image = intermmediate2.GetImage().Get(cbs).Value; + + TextureCopy.Copy( + gd.Api, + cbs.CommandBuffer, + srcImage, + intermmediate2Image, + src.Info, + intermmediate2.Info, + src.FirstLayer, + 0, + src.FirstLevel, + 0, + srcLayer, + 0, + srcLevel, + 0, + layers, + levels); + + TextureCopy.Copy( + gd.Api, + cbs.CommandBuffer, + intermmediate2Image, + intermmediateImage, + intermmediate2.Info, + intermmediate.Info, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + layers, + levels); + } + else + { + TextureCopy.Copy( + gd.Api, + cbs.CommandBuffer, + srcImage, + intermmediateImage, + src.Info, + intermmediate.Info, + src.FirstLayer, + 0, + src.FirstLevel, + 0, + srcLayer, + 0, + srcLevel, + 0, + layers, + levels); + } + + var srcRegion = new Extents2D(0, 0, src.Width, src.Height); + var dstRegion = new Extents2D(0, 0, dst.Width, dst.Height); + + TextureCopy.Blit( + gd.Api, + cbs.CommandBuffer, + intermmediateImage, + dstImage, + intermmediate.Info, + dst.Info, + srcRegion, + dstRegion, + 0, + dst.FirstLevel + dstLevel, + 0, + dst.FirstLayer + dstLayer, + layers, + levels, + true, + ImageAspectFlags.ImageAspectColorBit, + ImageAspectFlags.ImageAspectColorBit); + } + + private static TextureView CreateIntermmediateTexture(VulkanGraphicsDevice gd, TextureView src, ref TextureCreateInfo formatInfo, Target target, int depth, int levels) + { + return gd.CreateTextureView(new GAL.TextureCreateInfo( + src.Width, + src.Height, + depth, + levels, + 1, + formatInfo.BlockWidth, + formatInfo.BlockHeight, + formatInfo.BytesPerPixel, + formatInfo.Format, + DepthStencilMode.Depth, + target, + SwizzleComponent.Red, + SwizzleComponent.Green, + SwizzleComponent.Blue, + SwizzleComponent.Alpha), 1f); + } + + public void CopyTo(ITexture destination, Extents2D srcRegion, Extents2D dstRegion, bool linearFilter) + { + var dst = (TextureView)destination; + + if (_gd.CommandBufferPool.OwnedByCurrentThread) + { + _gd.PipelineInternal.EndRenderPass(); + + var cbs = _gd.PipelineInternal.CurrentCommandBuffer; + + CopyToImpl(cbs, dst, srcRegion, dstRegion, linearFilter); + } + else + { + var cbp = _gd.BackgroundResources.Get().GetPool(); + + using var cbs = cbp.Rent(); + + CopyToImpl(cbs, dst, srcRegion, dstRegion, linearFilter); + } + } + + private void CopyToImpl(CommandBufferScoped cbs, TextureView dst, Extents2D srcRegion, Extents2D dstRegion, bool linearFilter) + { + var src = this; + + var srcFormat = GetCompatibleGalFormat(src.Info.Format); + var dstFormat = GetCompatibleGalFormat(dst.Info.Format); + + bool srcUsesStorageFormat = src.VkFormat == src.Storage.VkFormat; + bool dstUsesStorageFormat = dst.VkFormat == dst.Storage.VkFormat; + + int layers = Math.Min(dst.Info.GetDepthOrLayers(), src.Info.GetDepthOrLayers()); + int levels = Math.Min(dst.Info.Levels, src.Info.Levels); + + if (srcUsesStorageFormat && dstUsesStorageFormat) + { + if ((srcRegion.X1 | dstRegion.X1) == 0 && + (srcRegion.Y1 | dstRegion.Y1) == 0 && + srcRegion.X2 == src.Width && + srcRegion.Y2 == src.Height && + dstRegion.X2 == dst.Width && + dstRegion.Y2 == dst.Height && + src.Width == dst.Width && + src.Height == dst.Height && + src.VkFormat == dst.VkFormat) + { + TextureCopy.Copy( + _gd.Api, + cbs.CommandBuffer, + src.GetImage().Get(cbs).Value, + dst.GetImage().Get(cbs).Value, + src.Info, + dst.Info, + src.FirstLayer, + dst.FirstLayer, + src.FirstLevel, + dst.FirstLevel, + 0, + 0, + 0, + 0, + layers, + levels); + + return; + } + else if (_gd.FormatCapabilities.FormatSupports(FormatFeatureFlags.FormatFeatureBlitSrcBit, srcFormat) && + _gd.FormatCapabilities.FormatSupports(FormatFeatureFlags.FormatFeatureBlitDstBit, dstFormat)) + { + TextureCopy.Blit( + _gd.Api, + cbs.CommandBuffer, + src.GetImage().Get(cbs).Value, + dst.GetImage().Get(cbs).Value, + src.Info, + dst.Info, + srcRegion, + dstRegion, + src.FirstLayer, + dst.FirstLayer, + src.FirstLevel, + dst.FirstLevel, + layers, + levels, + linearFilter); + + return; + } + else if (srcFormat == GAL.Format.D32FloatS8Uint && srcFormat == dstFormat && SupportsBlitFromD32FS8ToD32FAndS8()) + { + BlitDepthStencilWithBuffer(_gd, cbs, src, dst, srcRegion, dstRegion); + + return; + } + } + + if (VulkanConfiguration.UseSlowSafeBlitOnAmd && + _gd.Vendor == Vendor.Amd && + src.Info.Target == Target.Texture2D && + dst.Info.Target == Target.Texture2D && + !dst.Info.Format.IsDepthOrStencil()) + { + _gd.HelperShader.Blit( + _gd, + src, + dst.GetIdentityImageView(), + dst.Width, + dst.Height, + dst.VkFormat, + srcRegion, + dstRegion, + linearFilter); + + return; + } + + Auto srcImage; + Auto dstImage; + + if (dst.Info.Format.IsDepthOrStencil()) + { + srcImage = src.Storage.CreateAliasedColorForDepthStorageUnsafe(srcFormat).GetImage(); + dstImage = dst.Storage.CreateAliasedColorForDepthStorageUnsafe(dstFormat).GetImage(); + } + else + { + srcImage = src.Storage.CreateAliasedStorageUnsafe(srcFormat).GetImage(); + dstImage = dst.Storage.CreateAliasedStorageUnsafe(dstFormat).GetImage(); + } + + TextureCopy.Blit( + _gd.Api, + cbs.CommandBuffer, + srcImage.Get(cbs).Value, + dstImage.Get(cbs).Value, + src.Info, + dst.Info, + srcRegion, + dstRegion, + src.FirstLevel, + dst.FirstLevel, + src.FirstLayer, + dst.FirstLayer, + layers, + levels, + linearFilter, + ImageAspectFlags.ImageAspectColorBit, + ImageAspectFlags.ImageAspectColorBit); + } + + private static void BlitDepthStencilWithBuffer( + VulkanGraphicsDevice gd, + CommandBufferScoped cbs, + TextureView src, + TextureView dst, + Extents2D srcRegion, + Extents2D dstRegion) + { + int drBaseX = Math.Min(dstRegion.X1, dstRegion.X2); + int drBaseY = Math.Min(dstRegion.Y1, dstRegion.Y2); + int drWidth = Math.Abs(dstRegion.X2 - dstRegion.X1); + int drHeight = Math.Abs(dstRegion.Y2 - dstRegion.Y1); + + var drOriginZero = new Extents2D( + dstRegion.X1 - drBaseX, + dstRegion.Y1 - drBaseY, + dstRegion.X2 - drBaseX, + dstRegion.Y2 - drBaseY); + + var d32SrcStorageInfo = TextureStorage.NewCreateInfoWith(ref src._info, GAL.Format.D32Float, 4); + var d32DstStorageInfo = TextureStorage.NewCreateInfoWith(ref dst._info, GAL.Format.D32Float, 4, drWidth, drHeight); + var s8SrcStorageInfo = TextureStorage.NewCreateInfoWith(ref src._info, GAL.Format.S8Uint, 1); + var s8DstStorageInfo = TextureStorage.NewCreateInfoWith(ref dst._info, GAL.Format.S8Uint, 1, drWidth, drHeight); + + using var d32SrcStorage = gd.CreateTextureStorage(d32SrcStorageInfo, src.Storage.ScaleFactor); + using var d32DstStorage = gd.CreateTextureStorage(d32DstStorageInfo, dst.Storage.ScaleFactor); + using var s8SrcStorage = gd.CreateTextureStorage(s8SrcStorageInfo, src.Storage.ScaleFactor); + using var s8DstStorage = gd.CreateTextureStorage(s8DstStorageInfo, dst.Storage.ScaleFactor); + + void SlowBlit(TextureStorage srcTemp, TextureStorage dstTemp, ImageAspectFlags aspectFlags) + { + int levels = Math.Min(src.Info.Levels, dst.Info.Levels); + + int srcSize = 0; + int dstSize = 0; + + for (int l = 0; l < levels; l++) + { + srcSize += srcTemp.Info.GetMipSize2D(l); + dstSize += dstTemp.Info.GetMipSize2D(l); + } + + using var srcTempBuffer = gd.BufferManager.Create(gd, srcSize, deviceLocal: true); + using var dstTempBuffer = gd.BufferManager.Create(gd, dstSize, deviceLocal: true); + + src.Storage.CopyFromOrToBuffer( + cbs.CommandBuffer, + srcTempBuffer.GetBuffer().Get(cbs, 0, srcSize).Value, + src.GetImage().Get(cbs).Value, + srcSize, + to: true, + 0, + 0, + src.FirstLayer, + src.FirstLevel, + 1, + levels, + true, + aspectFlags, + false); + + BufferHolder.InsertBufferBarrier( + gd, + cbs.CommandBuffer, + srcTempBuffer.GetBuffer().Get(cbs, 0, srcSize).Value, + AccessFlags.AccessTransferWriteBit, + AccessFlags.AccessTransferReadBit, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageTransferBit, + 0, + srcSize); + + srcTemp.CopyFromOrToBuffer( + cbs.CommandBuffer, + srcTempBuffer.GetBuffer().Get(cbs, 0, srcSize).Value, + srcTemp.GetImage().Get(cbs).Value, + srcSize, + to: false, + 0, + 0, + 0, + 0, + 1, + levels, + true, + aspectFlags, + false); + + InsertImageBarrier( + gd.Api, + cbs.CommandBuffer, + srcTemp.GetImage().Get(cbs).Value, + AccessFlags.AccessTransferWriteBit, + AccessFlags.AccessTransferReadBit, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageTransferBit, + aspectFlags, + 0, + 0, + 1, + levels); + + TextureCopy.Blit( + gd.Api, + cbs.CommandBuffer, + srcTemp.GetImage().Get(cbs).Value, + dstTemp.GetImage().Get(cbs).Value, + srcTemp.Info, + dstTemp.Info, + srcRegion, + drOriginZero, + 0, + 0, + 0, + 0, + 1, + levels, + false, + aspectFlags, + aspectFlags); + + InsertImageBarrier( + gd.Api, + cbs.CommandBuffer, + dstTemp.GetImage().Get(cbs).Value, + AccessFlags.AccessTransferWriteBit, + AccessFlags.AccessTransferReadBit, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageTransferBit, + aspectFlags, + 0, + 0, + 1, + levels); + + dstTemp.CopyFromOrToBuffer( + cbs.CommandBuffer, + dstTempBuffer.GetBuffer().Get(cbs, 0, dstSize).Value, + dstTemp.GetImage().Get(cbs).Value, + dstSize, + to: true, + 0, + 0, + 0, + 0, + 1, + levels, + true, + aspectFlags, + false); + + BufferHolder.InsertBufferBarrier( + gd, + cbs.CommandBuffer, + dstTempBuffer.GetBuffer().Get(cbs, 0, dstSize).Value, + AccessFlags.AccessTransferWriteBit, + AccessFlags.AccessTransferReadBit, + PipelineStageFlags.PipelineStageTransferBit, + PipelineStageFlags.PipelineStageTransferBit, + 0, + dstSize); + + dst.Storage.CopyFromOrToBuffer( + cbs.CommandBuffer, + dstTempBuffer.GetBuffer().Get(cbs, 0, dstSize).Value, + dst.GetImage().Get(cbs).Value, + dstSize, + to: false, + drBaseX, + drBaseY, + dst.FirstLayer, + dst.FirstLevel, + 1, + levels, + true, + aspectFlags, + false); + } + + SlowBlit(d32SrcStorage, d32DstStorage, ImageAspectFlags.ImageAspectDepthBit); + SlowBlit(s8SrcStorage, s8DstStorage, ImageAspectFlags.ImageAspectStencilBit); + } + + public static unsafe void InsertImageBarrier( + Vk api, + CommandBuffer commandBuffer, + Image image, + AccessFlags srcAccessMask, + AccessFlags dstAccessMask, + PipelineStageFlags srcStageMask, + PipelineStageFlags dstStageMask, + ImageAspectFlags aspectFlags, + int firstLayer, + int firstLevel, + int layers, + int levels) + { + ImageMemoryBarrier memoryBarrier = new ImageMemoryBarrier() + { + SType = StructureType.ImageMemoryBarrier, + SrcAccessMask = srcAccessMask, + DstAccessMask = dstAccessMask, + SrcQueueFamilyIndex = Vk.QueueFamilyIgnored, + DstQueueFamilyIndex = Vk.QueueFamilyIgnored, + Image = image, + OldLayout = ImageLayout.General, + NewLayout = ImageLayout.General, + SubresourceRange = new ImageSubresourceRange(aspectFlags, (uint)firstLevel, (uint)levels, (uint)firstLayer, (uint)layers) + }; + + api.CmdPipelineBarrier( + commandBuffer, + srcStageMask, + dstStageMask, + 0, + 0, + null, + 0, + null, + 1, + memoryBarrier); + } + + private bool SupportsBlitFromD32FS8ToD32FAndS8() + { + var formatFeatureFlags = FormatFeatureFlags.FormatFeatureBlitSrcBit | FormatFeatureFlags.FormatFeatureBlitDstBit; + return _gd.FormatCapabilities.FormatSupports(formatFeatureFlags, GAL.Format.D32Float) && + _gd.FormatCapabilities.FormatSupports(formatFeatureFlags, GAL.Format.S8Uint); + } + + public TextureView GetView(GAL.Format format) + { + if (format == Info.Format) + { + return this; + } + + if (_selfManagedViews != null && _selfManagedViews.TryGetValue(format, out var view)) + { + return view; + } + + view = CreateViewImpl(new TextureCreateInfo( + Info.Width, + Info.Height, + Info.Depth, + Info.Levels, + Info.Samples, + Info.BlockWidth, + Info.BlockHeight, + Info.BytesPerPixel, + format, + Info.DepthStencilMode, + Info.Target, + Info.SwizzleR, + Info.SwizzleG, + Info.SwizzleB, + Info.SwizzleA), 0, 0); + + (_selfManagedViews ??= new Dictionary()).Add(format, view); + + return view; + } + + public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel) + { + return CreateViewImpl(info, firstLayer, firstLevel); + } + + private TextureView CreateViewImpl(TextureCreateInfo info, int firstLayer, int firstLevel) + { + return new TextureView(_gd, _device, info, Storage, FirstLayer + firstLayer, FirstLevel + firstLevel); + } + + public byte[] GetData(int x, int y, int width, int height) + { + int size = width * height * Info.BytesPerPixel; + using var bufferHolder = _gd.BufferManager.Create(_gd, size); + + using (var cbs = _gd.CommandBufferPool.Rent()) + { + var buffer = bufferHolder.GetBuffer(cbs.CommandBuffer).Get(cbs).Value; + var image = GetImage().Get(cbs).Value; + + CopyFromOrToBuffer(cbs.CommandBuffer, buffer, image, size, true, x, y, width, height); + } + + bufferHolder.WaitForFences(); + byte[] bitmap = new byte[size]; + GetDataFromBuffer(bufferHolder.GetDataStorage(0, size), size, Span.Empty).CopyTo(bitmap); + return bitmap; + } + + public ReadOnlySpan GetData() + { + BackgroundResource resources = _gd.BackgroundResources.Get(); + + if (_gd.CommandBufferPool.OwnedByCurrentThread) + { + _gd.FlushAllCommands(); + + return GetData(_gd.CommandBufferPool, resources.GetFlushBuffer()); + } + else + { + return GetData(resources.GetPool(), resources.GetFlushBuffer()); + } + } + + public ReadOnlySpan GetData(int layer, int level) + { + BackgroundResource resources = _gd.BackgroundResources.Get(); + + if (_gd.CommandBufferPool.OwnedByCurrentThread) + { + _gd.FlushAllCommands(); + + return GetData(_gd.CommandBufferPool, resources.GetFlushBuffer(), layer, level); + } + else + { + return GetData(resources.GetPool(), resources.GetFlushBuffer(), layer, level); + } + } + + private ReadOnlySpan GetData(CommandBufferPool cbp, PersistentFlushBuffer flushBuffer) + { + int size = 0; + + for (int level = 0; level < Info.Levels; level++) + { + size += Info.GetMipSize(level); + } + + size = GetBufferDataLength(size); + + Span result = flushBuffer.GetTextureData(cbp, this, size); + return GetDataFromBuffer(result, size, result); + } + + private ReadOnlySpan GetData(CommandBufferPool cbp, PersistentFlushBuffer flushBuffer, int layer, int level) + { + int size = GetBufferDataLength(Info.GetMipSize(level)); + + Span result = flushBuffer.GetTextureData(cbp, this, size, layer, level); + return GetDataFromBuffer(result, size, result); + } + + public void SetData(ReadOnlySpan data) + { + SetData(data, 0, 0, Info.GetLayers(), Info.Levels, singleSlice: false); + } + + public void SetData(ReadOnlySpan data, int layer, int level) + { + SetData(data, layer, level, 1, 1, singleSlice: true); + } + + private void SetData(ReadOnlySpan data, int layer, int level, int layers, int levels, bool singleSlice) + { + int bufferDataLength = GetBufferDataLength(data.Length); + + using var bufferHolder = _gd.BufferManager.Create(_gd, bufferDataLength); + + Auto imageAuto = GetImage(); + + // Load texture data inline if the texture has been used on the current command buffer. + + bool loadInline = Storage.HasCommandBufferDependency(_gd.PipelineInternal.CurrentCommandBuffer); + + var cbs = loadInline ? _gd.PipelineInternal.CurrentCommandBuffer : _gd.PipelineInternal.GetPreloadCommandBuffer(); + + if (loadInline) + { + _gd.PipelineInternal.EndRenderPass(); + } + + CopyDataToBuffer(bufferHolder.GetDataStorage(0, bufferDataLength), data); + + var buffer = bufferHolder.GetBuffer(cbs.CommandBuffer).Get(cbs).Value; + var image = imageAuto.Get(cbs).Value; + + CopyFromOrToBuffer(cbs.CommandBuffer, buffer, image, bufferDataLength, false, layer, level, layers, levels, singleSlice); + } + + private int GetBufferDataLength(int length) + { + if (NeedsD24S8Conversion()) + { + return length * 2; + } + + return length; + } + + private GAL.Format GetCompatibleGalFormat(GAL.Format format) + { + if (NeedsD24S8Conversion()) + { + return GAL.Format.D32FloatS8Uint; + } + + return format; + } + + private void CopyDataToBuffer(Span storage, ReadOnlySpan input) + { + if (NeedsD24S8Conversion()) + { + FormatConverter.ConvertD24S8ToD32FS8(storage, input); + return; + } + + input.CopyTo(storage); + } + + private ReadOnlySpan GetDataFromBuffer(ReadOnlySpan storage, int size, Span output) + { + if (NeedsD24S8Conversion()) + { + if (output.IsEmpty) + { + output = new byte[GetBufferDataLength(size)]; + } + + FormatConverter.ConvertD32FS8ToD24S8(output, storage); + return output; + } + + return storage; + } + + private bool NeedsD24S8Conversion() + { + return FormatCapabilities.IsD24S8(Info.Format) && VkFormat == VkFormat.D32SfloatS8Uint; + } + + public void CopyFromOrToBuffer( + CommandBuffer commandBuffer, + VkBuffer buffer, + Image image, + int size, + bool to, + int dstLayer, + int dstLevel, + int dstLayers, + int dstLevels, + bool singleSlice) + { + bool is3D = Info.Target == Target.Texture3D; + int width = Math.Max(1, Info.Width >> dstLevel); + int height = Math.Max(1, Info.Height >> dstLevel); + int depth = is3D && !singleSlice ? Math.Max(1, Info.Depth >> dstLevel) : 1; + int layer = is3D ? 0 : dstLayer; + int layers = dstLayers; + int levels = dstLevels; + + int offset = 0; + + for (int level = 0; level < levels; level++) + { + int mipSize = GetBufferDataLength(Info.GetMipSize(dstLevel + level)); + + int endOffset = offset + mipSize; + + if ((uint)endOffset > (uint)size) + { + break; + } + + int rowLength = (Info.GetMipStride(dstLevel + level) / Info.BytesPerPixel) * Info.BlockWidth; + + var aspectFlags = Info.Format.ConvertAspectFlags(); + + if (aspectFlags == (ImageAspectFlags.ImageAspectDepthBit | ImageAspectFlags.ImageAspectStencilBit)) + { + aspectFlags = ImageAspectFlags.ImageAspectDepthBit; + } + + var sl = new ImageSubresourceLayers( + aspectFlags, + (uint)(FirstLevel + dstLevel + level), + (uint)(FirstLayer + layer), + (uint)layers); + + var extent = new Extent3D((uint)width, (uint)height, (uint)depth); + + int z = is3D ? dstLayer : 0; + + var region = new BufferImageCopy((ulong)offset, (uint)rowLength, (uint)height, sl, new Offset3D(0, 0, z), extent); + + if (to) + { + _gd.Api.CmdCopyImageToBuffer(commandBuffer, image, ImageLayout.General, buffer, 1, region); + } + else + { + _gd.Api.CmdCopyBufferToImage(commandBuffer, buffer, image, ImageLayout.General, 1, region); + } + + offset += mipSize; + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + + if (Info.Target == Target.Texture3D) + { + depth = Math.Max(1, depth >> 1); + } + } + } + + private void CopyFromOrToBuffer( + CommandBuffer commandBuffer, + VkBuffer buffer, + Image image, + int size, + bool to, + int x, + int y, + int width, + int height) + { + var aspectFlags = Info.Format.ConvertAspectFlags(); + + if (aspectFlags == (ImageAspectFlags.ImageAspectDepthBit | ImageAspectFlags.ImageAspectStencilBit)) + { + aspectFlags = ImageAspectFlags.ImageAspectDepthBit; + } + + var sl = new ImageSubresourceLayers(aspectFlags, (uint)FirstLevel, (uint)FirstLayer, 1); + + var extent = new Extent3D((uint)width, (uint)height, 1); + + var region = new BufferImageCopy(0, (uint)width, (uint)height, sl, new Offset3D(x, y, 0), extent); + + if (to) + { + _gd.Api.CmdCopyImageToBuffer(commandBuffer, image, ImageLayout.General, buffer, 1, region); + } + else + { + _gd.Api.CmdCopyBufferToImage(commandBuffer, buffer, image, ImageLayout.General, 1, region); + } + } + + public void SetStorage(BufferRange buffer) + { + throw new NotImplementedException(); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + Valid = false; + + if (_gd.Textures.Remove(this)) + { + _imageView.Dispose(); + _imageViewIdentity.Dispose(); + _imageView2dArray?.Dispose(); + _flushStorage?.Dispose(); + } + } + } + + public void Dispose() + { + if (_selfManagedViews != null) + { + foreach (var view in _selfManagedViews.Values) + { + view.Dispose(); + } + + _selfManagedViews = null; + } + + Dispose(true); + } + + public void Release() + { + Dispose(); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Vendor.cs b/Ryujinx.Graphics.Vulkan/Vendor.cs new file mode 100644 index 000000000..0fd44c10d --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Vendor.cs @@ -0,0 +1,47 @@ +namespace Ryujinx.Graphics.Vulkan +{ + enum Vendor + { + Amd, + Intel, + Nvidia, + Qualcomm, + Unknown + } + + static class VendorUtils + { + public static Vendor FromId(uint id) + { + return id switch + { + 0x1002 => Vendor.Amd, + 0x10DE => Vendor.Nvidia, + 0x8086 => Vendor.Intel, + 0x5143 => Vendor.Qualcomm, + _ => Vendor.Unknown + }; + } + + public static string GetNameFromId(uint id) + { + return id switch + { + 0x1002 => "AMD", + 0x1010 => "ImgTec", + 0x10DE => "NVIDIA", + 0x13B5 => "ARM", + 0x1AE0 => "Google", + 0x5143 => "Qualcomm", + 0x8086 => "Intel", + 0x10001 => "Vivante", + 0x10002 => "VeriSilicon", + 0x10003 => "Kazan", + 0x10004 => "Codeplay Software Ltd.", + 0x10005 => "Mesa", + 0x10006 => "PoCL", + _ => $"0x{id:X}" + }; + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/VulkanConfiguration.cs b/Ryujinx.Graphics.Vulkan/VulkanConfiguration.cs new file mode 100644 index 000000000..c3bc65dd9 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/VulkanConfiguration.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Vulkan +{ + static class VulkanConfiguration + { + public const bool UseDynamicState = true; + + public const bool UseFastBufferUpdates = true; + public const bool UseGranularBufferTracking = true; + public const bool UseSlowSafeBlitOnAmd = true; + public const bool UsePushDescriptors = false; + + public const bool ForceD24S8Unsupported = false; + } +} diff --git a/Ryujinx.Graphics.Vulkan/VulkanException.cs b/Ryujinx.Graphics.Vulkan/VulkanException.cs new file mode 100644 index 000000000..ca3ac6b63 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/VulkanException.cs @@ -0,0 +1,40 @@ +using Silk.NET.Vulkan; +using System; +using System.Runtime.Serialization; + +namespace Ryujinx.Graphics.Vulkan +{ + static class ResultExtensions + { + public static void ThrowOnError(this Result result) + { + if (result != Result.Success) + { + throw new VulkanException(result); + } + } + } + + class VulkanException : Exception + { + public VulkanException() + { + } + + public VulkanException(Result result) : base($"Unexpected API error \"{result}\".") + { + } + + public VulkanException(string message) : base(message) + { + } + + public VulkanException(string message, Exception innerException) : base(message, innerException) + { + } + + protected VulkanException(SerializationInfo info, StreamingContext context) : base(info, context) + { + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs b/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs new file mode 100644 index 000000000..2025d0f81 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs @@ -0,0 +1,604 @@ +using Ryujinx.Common.Configuration; +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using Ryujinx.Graphics.Vulkan.Queries; +using Silk.NET.Vulkan; +using Silk.NET.Vulkan.Extensions.EXT; +using Silk.NET.Vulkan.Extensions.KHR; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Vulkan +{ + public sealed class VulkanGraphicsDevice : IRenderer + { + private Instance _instance; + private SurfaceKHR _surface; + private PhysicalDevice _physicalDevice; + private Device _device; + private uint _queueFamilyIndex; + private WindowBase _window; + + internal FormatCapabilities FormatCapabilities { get; private set; } + internal HardwareCapabilities Capabilities; + + internal Vk Api { get; private set; } + internal KhrSurface SurfaceApi { get; private set; } + internal KhrSwapchain SwapchainApi { get; private set; } + internal ExtConditionalRendering ConditionalRenderingApi { get; private set; } + internal ExtExtendedDynamicState ExtendedDynamicStateApi { get; private set; } + internal KhrPushDescriptor PushDescriptorApi { get; private set; } + internal ExtTransformFeedback TransformFeedbackApi { get; private set; } + internal KhrDrawIndirectCount DrawIndirectCountApi { get; private set; } + internal ExtDebugReport DebugReportApi { get; private set; } + + internal bool SupportsIndexTypeUint8 { get; private set; } + internal bool SupportsCustomBorderColor { get; private set; } + internal bool SupportsIndirectParameters { get; private set; } + internal bool SupportsFragmentShaderInterlock { get; private set; } + internal bool SupportsGeometryShaderPassthrough { get; private set; } + internal bool SupportsSubgroupSizeControl { get; private set; } + + internal uint QueueFamilyIndex { get; private set; } + public bool IsOffScreen { get; } + internal Queue Queue { get; private set; } + internal Queue BackgroundQueue { get; private set; } + internal object BackgroundQueueLock { get; private set; } + internal object QueueLock { get; private set; } + + internal MemoryAllocator MemoryAllocator { get; private set; } + internal CommandBufferPool CommandBufferPool { get; private set; } + internal DescriptorSetManager DescriptorSetManager { get; private set; } + internal PipelineLayoutCache PipelineLayoutCache { get; private set; } + internal BackgroundResources BackgroundResources { get; private set; } + + internal BufferManager BufferManager { get; private set; } + + internal HashSet Shaders { get; } + internal HashSet Textures { get; } + internal HashSet Samplers { get; } + + private Counters _counters; + private SyncManager _syncManager; + + private PipelineFull _pipeline; + private DebugReportCallbackEXT _debugReportCallback; + + internal HelperShader HelperShader { get; private set; } + internal PipelineFull PipelineInternal => _pipeline; + + public IPipeline Pipeline => _pipeline; + + public IWindow Window => _window; + + private readonly Func _getSurface; + private readonly Func _getRequiredExtensions; + private readonly string _preferredGpuId; + + internal Vendor Vendor { get; private set; } + internal bool IsAmdWindows { get; private set; } + internal bool IsIntelWindows { get; private set; } + public string GpuVendor { get; private set; } + public string GpuRenderer { get; private set; } + public string GpuVersion { get; private set; } + + public bool PreferThreading => true; + + public event EventHandler ScreenCaptured; + + public VulkanGraphicsDevice(Func surfaceFunc, Func requiredExtensionsFunc, string preferredGpuId) + { + _getSurface = surfaceFunc; + _getRequiredExtensions = requiredExtensionsFunc; + _preferredGpuId = preferredGpuId; + Shaders = new HashSet(); + Textures = new HashSet(); + Samplers = new HashSet(); + } + + public VulkanGraphicsDevice(Instance instance, Device device, PhysicalDevice physicalDevice, Queue queue, uint queueFamilyIndex, object lockObject) + { + _instance = instance; + _physicalDevice = physicalDevice; + _device = device; + _queueFamilyIndex = queueFamilyIndex; + + Queue = queue; + QueueLock = lockObject; + + IsOffScreen = true; + Shaders = new HashSet(); + Textures = new HashSet(); + Samplers = new HashSet(); + } + + private unsafe void LoadFeatures(string[] supportedExtensions, uint maxQueueCount, uint queueFamilyIndex) + { + FormatCapabilities = new FormatCapabilities(Api, _physicalDevice); + + var supportedFeatures = Api.GetPhysicalDeviceFeature(_physicalDevice); + SupportsIndexTypeUint8 = supportedExtensions.Contains("VK_EXT_index_type_uint8"); + SupportsCustomBorderColor = supportedExtensions.Contains("VK_EXT_custom_border_color"); + SupportsIndirectParameters = supportedExtensions.Contains(KhrDrawIndirectCount.ExtensionName); + SupportsFragmentShaderInterlock = supportedExtensions.Contains("VK_EXT_fragment_shader_interlock"); + SupportsGeometryShaderPassthrough = supportedExtensions.Contains("VK_NV_geometry_shader_passthrough"); + SupportsSubgroupSizeControl = supportedExtensions.Contains("VK_EXT_subgroup_size_control"); + + if (Api.TryGetDeviceExtension(_instance, _device, out ExtConditionalRendering conditionalRenderingApi)) + { + ConditionalRenderingApi = conditionalRenderingApi; + } + + if (Api.TryGetDeviceExtension(_instance, _device, out ExtExtendedDynamicState extendedDynamicStateApi)) + { + ExtendedDynamicStateApi = extendedDynamicStateApi; + } + + if (Api.TryGetDeviceExtension(_instance, _device, out KhrPushDescriptor pushDescriptorApi)) + { + PushDescriptorApi = pushDescriptorApi; + } + + if (Api.TryGetDeviceExtension(_instance, _device, out ExtTransformFeedback transformFeedbackApi)) + { + TransformFeedbackApi = transformFeedbackApi; + } + + if (Api.TryGetDeviceExtension(_instance, _device, out KhrDrawIndirectCount drawIndirectCountApi)) + { + DrawIndirectCountApi = drawIndirectCountApi; + } + + if (maxQueueCount >= 2) + { + Api.GetDeviceQueue(_device, queueFamilyIndex, 1, out var backgroundQueue); + BackgroundQueue = backgroundQueue; + BackgroundQueueLock = new object(); + } + + PhysicalDeviceProperties2 properties2 = new PhysicalDeviceProperties2() + { + SType = StructureType.PhysicalDeviceProperties2 + }; + + PhysicalDeviceSubgroupSizeControlPropertiesEXT propertiesSubgroupSizeControl = new PhysicalDeviceSubgroupSizeControlPropertiesEXT() + { + SType = StructureType.PhysicalDeviceSubgroupSizeControlPropertiesExt + }; + + if (SupportsSubgroupSizeControl) + { + properties2.PNext = &propertiesSubgroupSizeControl; + } + + bool supportsTransformFeedback = supportedExtensions.Contains(ExtTransformFeedback.ExtensionName); + + PhysicalDeviceTransformFeedbackPropertiesEXT propertiesTransformFeedback = new PhysicalDeviceTransformFeedbackPropertiesEXT() + { + SType = StructureType.PhysicalDeviceTransformFeedbackPropertiesExt + }; + + if (supportsTransformFeedback) + { + propertiesTransformFeedback.PNext = properties2.PNext; + properties2.PNext = &propertiesTransformFeedback; + } + + Api.GetPhysicalDeviceProperties2(_physicalDevice, &properties2); + + PhysicalDeviceFeatures2 features2 = new PhysicalDeviceFeatures2() + { + SType = StructureType.PhysicalDeviceFeatures2 + }; + + PhysicalDeviceRobustness2FeaturesEXT featuresRobustness2 = new PhysicalDeviceRobustness2FeaturesEXT() + { + SType = StructureType.PhysicalDeviceRobustness2FeaturesExt + }; + + if (supportedExtensions.Contains("VK_EXT_robustness2")) + { + features2.PNext = &featuresRobustness2; + } + + Api.GetPhysicalDeviceFeatures2(_physicalDevice, &features2); + + Capabilities = new HardwareCapabilities( + supportedExtensions.Contains(ExtConditionalRendering.ExtensionName), + supportedExtensions.Contains(ExtExtendedDynamicState.ExtensionName), + features2.Features.MultiViewport, + featuresRobustness2.NullDescriptor, + supportedExtensions.Contains(KhrPushDescriptor.ExtensionName), + supportsTransformFeedback, + propertiesTransformFeedback.TransformFeedbackQueries, + supportedFeatures.GeometryShader, + propertiesSubgroupSizeControl.MinSubgroupSize, + propertiesSubgroupSizeControl.MaxSubgroupSize, + propertiesSubgroupSizeControl.RequiredSubgroupSizeStages); + + ref var properties = ref properties2.Properties; + + MemoryAllocator = new MemoryAllocator(Api, _device, properties.Limits.MaxMemoryAllocationCount); + + CommandBufferPool = VulkanInitialization.CreateCommandBufferPool(Api, _device, Queue, QueueLock, queueFamilyIndex); + + DescriptorSetManager = new DescriptorSetManager(_device); + + PipelineLayoutCache = new PipelineLayoutCache(); + + BackgroundResources = new BackgroundResources(this, _device); + + BufferManager = new BufferManager(this, _physicalDevice, _device); + + _syncManager = new SyncManager(this, _device); + _pipeline = new PipelineFull(this, _device); + + HelperShader = new HelperShader(this, _device); + + _counters = new Counters(this, _device, _pipeline); + } + + private unsafe void SetupContext(GraphicsDebugLevel logLevel) + { + var api = Vk.GetApi(); + + Api = api; + + _instance = VulkanInitialization.CreateInstance(api, logLevel, _getRequiredExtensions(), out ExtDebugReport debugReport, out _debugReportCallback); + + DebugReportApi = debugReport; + + if (api.TryGetInstanceExtension(_instance, out KhrSurface surfaceApi)) + { + SurfaceApi = surfaceApi; + } + + _surface = _getSurface(_instance, api); + _physicalDevice = VulkanInitialization.FindSuitablePhysicalDevice(api, _instance, _surface, _preferredGpuId); + + var queueFamilyIndex = VulkanInitialization.FindSuitableQueueFamily(api, _physicalDevice, _surface, out uint maxQueueCount); + var supportedExtensions = VulkanInitialization.GetSupportedExtensions(api, _physicalDevice); + + _device = VulkanInitialization.CreateDevice(api, _physicalDevice, queueFamilyIndex, supportedExtensions, maxQueueCount); + + if (api.TryGetDeviceExtension(_instance, _device, out KhrSwapchain swapchainApi)) + { + SwapchainApi = swapchainApi; + } + + api.GetDeviceQueue(_device, queueFamilyIndex, 0, out var queue); + Queue = queue; + QueueLock = new object(); + + LoadFeatures(supportedExtensions, maxQueueCount, queueFamilyIndex); + + _window = new Window(this, _surface, _physicalDevice, _device); + } + + private unsafe void SetupOffScreenContext(GraphicsDebugLevel logLevel) + { + var api = Vk.GetApi(); + + Api = api; + + VulkanInitialization.CreateDebugCallbacks(api, logLevel, _instance, out var debugReport, out _debugReportCallback); + + DebugReportApi = debugReport; + + var supportedExtensions = VulkanInitialization.GetSupportedExtensions(api, _physicalDevice); + + uint propertiesCount; + + api.GetPhysicalDeviceQueueFamilyProperties(_physicalDevice, &propertiesCount, null); + + QueueFamilyProperties[] queueFamilyProperties = new QueueFamilyProperties[propertiesCount]; + + fixed (QueueFamilyProperties* pProperties = queueFamilyProperties) + { + api.GetPhysicalDeviceQueueFamilyProperties(_physicalDevice, &propertiesCount, pProperties); + } + + LoadFeatures(supportedExtensions, queueFamilyProperties[0].QueueCount, _queueFamilyIndex); + + _window = new ImageWindow(this, _physicalDevice, _device); + } + + public BufferHandle CreateBuffer(int size) + { + return BufferManager.CreateWithHandle(this, size, false); + } + + public IProgram CreateProgram(ShaderSource[] sources, ShaderInfo info) + { + bool isCompute = sources.Length == 1 && sources[0].Stage == ShaderStage.Compute; + + if ((info.State.HasValue || isCompute) && VulkanConfiguration.UseDynamicState) + { + return new ShaderCollection(this, _device, sources, info.State ?? default, info.FromCache); + } + else + { + return new ShaderCollection(this, _device, sources); + } + } + + internal ShaderCollection CreateProgramWithMinimalLayout(ShaderSource[] sources) + { + return new ShaderCollection(this, _device, sources, isMinimal: true); + } + + public ISampler CreateSampler(GAL.SamplerCreateInfo info) + { + return new SamplerHolder(this, _device, info); + } + + public ITexture CreateTexture(TextureCreateInfo info, float scale) + { + if (info.Target == Target.TextureBuffer) + { + return new TextureBuffer(this, info, scale); + } + + return CreateTextureView(info, scale); + } + + internal TextureView CreateTextureView(TextureCreateInfo info, float scale) + { + // This should be disposed when all views are destroyed. + using var storage = CreateTextureStorage(info, scale); + return storage.CreateView(info, 0, 0); + } + + internal TextureStorage CreateTextureStorage(TextureCreateInfo info, float scale) + { + return new TextureStorage(this, _physicalDevice, _device, info, scale); + } + + public void DeleteBuffer(BufferHandle buffer) + { + BufferManager.Delete(buffer); + } + + internal void FlushAllCommands() + { + // System.Console.WriteLine("flush commands " + caller); + _pipeline?.FlushCommandsImpl(); + } + + public ReadOnlySpan GetBufferData(BufferHandle buffer, int offset, int size) + { + return BufferManager.GetData(buffer, offset, size); + } + + public Capabilities GetCapabilities() + { + FormatFeatureFlags compressedFormatFeatureFlags = + FormatFeatureFlags.FormatFeatureSampledImageBit | + FormatFeatureFlags.FormatFeatureSampledImageFilterLinearBit | + FormatFeatureFlags.FormatFeatureBlitSrcBit | + FormatFeatureFlags.FormatFeatureTransferSrcBit | + FormatFeatureFlags.FormatFeatureTransferDstBit; + + bool supportsBc123CompressionFormat = FormatCapabilities.FormatsSupports(compressedFormatFeatureFlags, + GAL.Format.Bc1RgbaSrgb, + GAL.Format.Bc1RgbaUnorm, + GAL.Format.Bc2Srgb, + GAL.Format.Bc2Unorm, + GAL.Format.Bc3Srgb, + GAL.Format.Bc3Unorm); + + bool supportsBc45CompressionFormat = FormatCapabilities.FormatsSupports(compressedFormatFeatureFlags, + GAL.Format.Bc4Snorm, + GAL.Format.Bc4Unorm, + GAL.Format.Bc5Snorm, + GAL.Format.Bc5Unorm); + + bool supportsBc67CompressionFormat = FormatCapabilities.FormatsSupports(compressedFormatFeatureFlags, + GAL.Format.Bc6HSfloat, + GAL.Format.Bc6HUfloat, + GAL.Format.Bc7Srgb, + GAL.Format.Bc7Unorm); + + Api.GetPhysicalDeviceFeatures(_physicalDevice, out var features); + Api.GetPhysicalDeviceProperties(_physicalDevice, out var properties); + + var limits = properties.Limits; + + return new Capabilities( + api: TargetApi.Vulkan, + GpuVendor, + hasFrontFacingBug: IsIntelWindows, + hasVectorIndexingBug: Vendor == Vendor.Qualcomm, + supportsAstcCompression: features.TextureCompressionAstcLdr, + supportsBc123Compression: supportsBc123CompressionFormat, + supportsBc45Compression: supportsBc45CompressionFormat, + supportsBc67Compression: supportsBc67CompressionFormat, + supports3DTextureCompression: true, + supportsBgraFormat: true, + supportsR4G4Format: false, + supportsFragmentShaderInterlock: SupportsFragmentShaderInterlock, + supportsFragmentShaderOrderingIntel: false, + supportsGeometryShaderPassthrough: SupportsGeometryShaderPassthrough, + supportsImageLoadFormatted: features.ShaderStorageImageReadWithoutFormat, + supportsMismatchingViewFormat: true, + supportsNonConstantTextureOffset: false, + supportsShaderBallot: false, + supportsTextureShadowLod: false, + supportsViewportSwizzle: false, + supportsIndirectParameters: SupportsIndirectParameters, + maximumUniformBuffersPerStage: Constants.MaxUniformBuffersPerStage, + maximumStorageBuffersPerStage: Constants.MaxStorageBuffersPerStage, + maximumTexturesPerStage: Constants.MaxTexturesPerStage, + maximumImagesPerStage: Constants.MaxImagesPerStage, + maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize, + maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy, + storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment); + } + + public HardwareInfo GetHardwareInfo() + { + return new HardwareInfo(GpuVendor, GpuRenderer); + } + + public static DeviceInfo[] GetPhysicalDevices() + { + return VulkanInitialization.GetSuitablePhysicalDevices(Vk.GetApi()); + } + + private static string ParseStandardVulkanVersion(uint version) + { + return $"{version >> 22}.{(version >> 12) & 0x3FF}.{version & 0xFFF}"; + } + + private static string ParseDriverVersion(ref PhysicalDeviceProperties properties) + { + uint driverVersionRaw = properties.DriverVersion; + + // NVIDIA differ from the standard here and uses a different format. + if (properties.VendorID == 0x10DE) + { + return $"{(driverVersionRaw >> 22) & 0x3FF}.{(driverVersionRaw >> 14) & 0xFF}.{(driverVersionRaw >> 6) & 0xFF}.{driverVersionRaw & 0x3F}"; + } + else + { + return ParseStandardVulkanVersion(driverVersionRaw); + } + } + + private unsafe void PrintGpuInformation() + { + Api.GetPhysicalDeviceProperties(_physicalDevice, out var properties); + + string vendorName = VendorUtils.GetNameFromId(properties.VendorID); + + Vendor = VendorUtils.FromId(properties.VendorID); + + IsAmdWindows = Vendor == Vendor.Amd && RuntimeInformation.IsOSPlatform(OSPlatform.Windows); + IsIntelWindows = Vendor == Vendor.Intel && RuntimeInformation.IsOSPlatform(OSPlatform.Windows); + + GpuVendor = vendorName; + GpuRenderer = Marshal.PtrToStringAnsi((IntPtr)properties.DeviceName); + GpuVersion = $"Vulkan v{ParseStandardVulkanVersion(properties.ApiVersion)}, Driver v{ParseDriverVersion(ref properties)}"; + + Logger.Notice.Print(LogClass.Gpu, $"{GpuVendor} {GpuRenderer} ({GpuVersion})"); + } + + public void Initialize(GraphicsDebugLevel logLevel) + { + if (IsOffScreen) + { + SetupOffScreenContext(logLevel); + } + else + { + SetupContext(logLevel); + } + + PrintGpuInformation(); + } + + public void PreFrame() + { + _syncManager.Cleanup(); + } + + public ICounterEvent ReportCounter(CounterType type, EventHandler resultHandler, bool hostReserved) + { + return _counters.QueueReport(type, resultHandler, hostReserved); + } + + public void ResetCounter(CounterType type) + { + _counters.QueueReset(type); + } + + public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan data) + { + BufferManager.SetData(buffer, offset, data, _pipeline.CurrentCommandBuffer, _pipeline.EndRenderPass); + } + + public void UpdateCounters() + { + _counters.Update(); + } + + public void BackgroundContextAction(Action action, bool alwaysBackground = false) + { + action(); + } + + public void CreateSync(ulong id) + { + _syncManager.Create(id); + } + + public IProgram LoadProgramBinary(byte[] programBinary, bool isFragment, ShaderInfo info) + { + throw new NotImplementedException(); + } + + public void WaitSync(ulong id) + { + _syncManager.Wait(id); + } + + public void Screenshot() + { + _window.ScreenCaptureRequested = true; + } + + public void OnScreenCaptured(ScreenCaptureImageInfo bitmap) + { + ScreenCaptured?.Invoke(this, bitmap); + } + + public unsafe void Dispose() + { + CommandBufferPool.Dispose(); + BackgroundResources.Dispose(); + _counters.Dispose(); + _window.Dispose(); + HelperShader.Dispose(); + _pipeline.Dispose(); + BufferManager.Dispose(); + DescriptorSetManager.Dispose(); + PipelineLayoutCache.Dispose(); + + MemoryAllocator.Dispose(); + + if (_debugReportCallback.Handle != 0) + { + DebugReportApi.DestroyDebugReportCallback(_instance, _debugReportCallback, null); + } + + foreach (var shader in Shaders) + { + shader.Dispose(); + } + + foreach (var texture in Textures) + { + texture.Release(); + } + + foreach (var sampler in Samplers) + { + sampler.Dispose(); + } + + if (!IsOffScreen) + { + SurfaceApi.DestroySurface(_instance, _surface, null); + + Api.DestroyDevice(_device, null); + + // Last step destroy the instance + Api.DestroyInstance(_instance, null); + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/VulkanInitialization.cs b/Ryujinx.Graphics.Vulkan/VulkanInitialization.cs new file mode 100644 index 000000000..8b71a8f6a --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/VulkanInitialization.cs @@ -0,0 +1,590 @@ +using Ryujinx.Common.Configuration; +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using Silk.NET.Vulkan.Extensions.EXT; +using Silk.NET.Vulkan.Extensions.KHR; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Vulkan +{ + public unsafe static class VulkanInitialization + { + private const uint InvalidIndex = uint.MaxValue; + private const string AppName = "Ryujinx.Graphics.Vulkan"; + private const int QueuesCount = 2; + + public static string[] DesirableExtensions { get; } = new string[] + { + ExtConditionalRendering.ExtensionName, + ExtExtendedDynamicState.ExtensionName, + KhrDrawIndirectCount.ExtensionName, + KhrPushDescriptor.ExtensionName, + "VK_EXT_custom_border_color", + "VK_EXT_descriptor_indexing", // Enabling this works around an issue with disposed buffer bindings on RADV. + "VK_EXT_fragment_shader_interlock", + "VK_EXT_index_type_uint8", + "VK_EXT_robustness2", + "VK_EXT_shader_subgroup_ballot", + "VK_EXT_subgroup_size_control", + "VK_NV_geometry_shader_passthrough" + }; + + public static string[] RequiredExtensions { get; } = new string[] + { + KhrSwapchain.ExtensionName, + "VK_EXT_shader_subgroup_vote", + ExtTransformFeedback.ExtensionName + }; + + private static string[] _excludedMessages = new string[] + { + // NOTE: Done on purpuse right now. + "UNASSIGNED-CoreValidation-Shader-OutputNotConsumed", + // TODO: Figure out if fixable + "VUID-vkCmdDrawIndexed-None-04584", + // TODO: Might be worth looking into making this happy to possibly optimize copies. + "UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout", + // TODO: Fix this, it's causing too much noise right now. + "VUID-VkSubpassDependency-srcSubpass-00867" + }; + + internal static Instance CreateInstance(Vk api, GraphicsDebugLevel logLevel, string[] requiredExtensions, out ExtDebugReport debugReport, out DebugReportCallbackEXT debugReportCallback) + { + var enabledLayers = new List(); + + void AddAvailableLayer(string layerName) + { + uint layerPropertiesCount; + + api.EnumerateInstanceLayerProperties(&layerPropertiesCount, null).ThrowOnError(); + + LayerProperties[] layerProperties = new LayerProperties[layerPropertiesCount]; + + fixed (LayerProperties* pLayerProperties = layerProperties) + { + api.EnumerateInstanceLayerProperties(&layerPropertiesCount, layerProperties).ThrowOnError(); + + for (int i = 0; i < layerPropertiesCount; i++) + { + string currentLayerName = Marshal.PtrToStringAnsi((IntPtr)pLayerProperties[i].LayerName); + + if (currentLayerName == layerName) + { + enabledLayers.Add(layerName); + return; + } + } + } + + Logger.Warning?.Print(LogClass.Gpu, $"Missing layer {layerName}"); + } + + if (logLevel != GraphicsDebugLevel.None) + { + AddAvailableLayer("VK_LAYER_KHRONOS_validation"); + } + + var enabledExtensions = requiredExtensions.Append(ExtDebugReport.ExtensionName).ToArray(); + + var appName = Marshal.StringToHGlobalAnsi(AppName); + + var applicationInfo = new ApplicationInfo + { + PApplicationName = (byte*)appName, + ApplicationVersion = 1, + PEngineName = (byte*)appName, + EngineVersion = 1, + ApiVersion = Vk.Version12.Value + }; + + IntPtr* ppEnabledExtensions = stackalloc IntPtr[enabledExtensions.Length]; + IntPtr* ppEnabledLayers = stackalloc IntPtr[enabledLayers.Count]; + + for (int i = 0; i < enabledExtensions.Length; i++) + { + ppEnabledExtensions[i] = Marshal.StringToHGlobalAnsi(enabledExtensions[i]); + } + + for (int i = 0; i < enabledLayers.Count; i++) + { + ppEnabledLayers[i] = Marshal.StringToHGlobalAnsi(enabledLayers[i]); + } + + var instanceCreateInfo = new InstanceCreateInfo + { + SType = StructureType.InstanceCreateInfo, + PApplicationInfo = &applicationInfo, + PpEnabledExtensionNames = (byte**)ppEnabledExtensions, + PpEnabledLayerNames = (byte**)ppEnabledLayers, + EnabledExtensionCount = (uint)enabledExtensions.Length, + EnabledLayerCount = (uint)enabledLayers.Count + }; + + api.CreateInstance(in instanceCreateInfo, null, out var instance).ThrowOnError(); + + Marshal.FreeHGlobal(appName); + + for (int i = 0; i < enabledExtensions.Length; i++) + { + Marshal.FreeHGlobal(ppEnabledExtensions[i]); + } + + for (int i = 0; i < enabledLayers.Count; i++) + { + Marshal.FreeHGlobal(ppEnabledLayers[i]); + } + + CreateDebugCallbacks(api, logLevel, instance, out debugReport, out debugReportCallback); + + return instance; + } + + private unsafe static uint DebugReport( + uint flags, + DebugReportObjectTypeEXT objectType, + ulong @object, + nuint location, + int messageCode, + byte* layerPrefix, + byte* message, + void* userData) + { + var msg = Marshal.PtrToStringAnsi((IntPtr)message); + + foreach (string excludedMessagePart in _excludedMessages) + { + if (msg.Contains(excludedMessagePart)) + { + return 0; + } + } + + DebugReportFlagsEXT debugFlags = (DebugReportFlagsEXT)flags; + + if (debugFlags.HasFlag(DebugReportFlagsEXT.DebugReportErrorBitExt)) + { + Logger.Error?.Print(LogClass.Gpu, msg); + //throw new Exception(msg); + } + else if (debugFlags.HasFlag(DebugReportFlagsEXT.DebugReportWarningBitExt)) + { + Logger.Warning?.Print(LogClass.Gpu, msg); + } + else if (debugFlags.HasFlag(DebugReportFlagsEXT.DebugReportInformationBitExt)) + { + Logger.Info?.Print(LogClass.Gpu, msg); + } + else if (debugFlags.HasFlag(DebugReportFlagsEXT.DebugReportPerformanceWarningBitExt)) + { + Logger.Warning?.Print(LogClass.Gpu, msg); + } + else + { + Logger.Debug?.Print(LogClass.Gpu, msg); + } + + return 0; + } + + internal static PhysicalDevice FindSuitablePhysicalDevice(Vk api, Instance instance, SurfaceKHR surface, string preferredGpuId) + { + uint physicalDeviceCount; + + api.EnumeratePhysicalDevices(instance, &physicalDeviceCount, null).ThrowOnError(); + + PhysicalDevice[] physicalDevices = new PhysicalDevice[physicalDeviceCount]; + + fixed (PhysicalDevice* pPhysicalDevices = physicalDevices) + { + api.EnumeratePhysicalDevices(instance, &physicalDeviceCount, pPhysicalDevices).ThrowOnError(); + } + + // First we try to pick the the user preferred GPU. + for (int i = 0; i < physicalDevices.Length; i++) + { + if (IsPreferredAndSuitableDevice(api, physicalDevices[i], surface, preferredGpuId)) + { + return physicalDevices[i]; + } + } + + // If we fail to do that, just use the first compatible GPU. + for (int i = 0; i < physicalDevices.Length; i++) + { + if (IsSuitableDevice(api, physicalDevices[i], surface)) + { + return physicalDevices[i]; + } + } + + throw new VulkanException("Initialization failed, none of the available GPUs meets the minimum requirements."); + } + + internal static DeviceInfo[] GetSuitablePhysicalDevices(Vk api) + { + var appName = Marshal.StringToHGlobalAnsi(AppName); + + var applicationInfo = new ApplicationInfo + { + PApplicationName = (byte*)appName, + ApplicationVersion = 1, + PEngineName = (byte*)appName, + EngineVersion = 1, + ApiVersion = Vk.Version12.Value + }; + + var instanceCreateInfo = new InstanceCreateInfo + { + SType = StructureType.InstanceCreateInfo, + PApplicationInfo = &applicationInfo, + PpEnabledExtensionNames = null, + PpEnabledLayerNames = null, + EnabledExtensionCount = 0, + EnabledLayerCount = 0 + }; + + api.CreateInstance(in instanceCreateInfo, null, out var instance).ThrowOnError(); + + Marshal.FreeHGlobal(appName); + + uint physicalDeviceCount; + + api.EnumeratePhysicalDevices(instance, &physicalDeviceCount, null).ThrowOnError(); + + PhysicalDevice[] physicalDevices = new PhysicalDevice[physicalDeviceCount]; + + fixed (PhysicalDevice* pPhysicalDevices = physicalDevices) + { + api.EnumeratePhysicalDevices(instance, &physicalDeviceCount, pPhysicalDevices).ThrowOnError(); + } + + DeviceInfo[] devices = new DeviceInfo[physicalDevices.Length]; + + for (int i = 0; i < physicalDevices.Length; i++) + { + var physicalDevice = physicalDevices[i]; + api.GetPhysicalDeviceProperties(physicalDevice, out var properties); + + devices[i] = new DeviceInfo( + StringFromIdPair(properties.VendorID, properties.DeviceID), + VendorUtils.GetNameFromId(properties.VendorID), + Marshal.PtrToStringAnsi((IntPtr)properties.DeviceName), + properties.DeviceType == PhysicalDeviceType.DiscreteGpu); + } + + api.DestroyInstance(instance, null); + + return devices; + } + + public static string StringFromIdPair(uint vendorId, uint deviceId) + { + return $"0x{vendorId:X}_0x{deviceId:X}"; + } + + private static bool IsPreferredAndSuitableDevice(Vk api, PhysicalDevice physicalDevice, SurfaceKHR surface, string preferredGpuId) + { + api.GetPhysicalDeviceProperties(physicalDevice, out var properties); + + if (StringFromIdPair(properties.VendorID, properties.DeviceID) != preferredGpuId) + { + return false; + } + + return IsSuitableDevice(api, physicalDevice, surface); + } + + private static bool IsSuitableDevice(Vk api, PhysicalDevice physicalDevice, SurfaceKHR surface) + { + int extensionMatches = 0; + uint propertiesCount; + + api.EnumerateDeviceExtensionProperties(physicalDevice, (byte*)null, &propertiesCount, null).ThrowOnError(); + + ExtensionProperties[] extensionProperties = new ExtensionProperties[propertiesCount]; + + fixed (ExtensionProperties* pExtensionProperties = extensionProperties) + { + api.EnumerateDeviceExtensionProperties(physicalDevice, (byte*)null, &propertiesCount, pExtensionProperties).ThrowOnError(); + + for (int i = 0; i < propertiesCount; i++) + { + string extensionName = Marshal.PtrToStringAnsi((IntPtr)pExtensionProperties[i].ExtensionName); + + if (RequiredExtensions.Contains(extensionName)) + { + extensionMatches++; + } + } + } + + return extensionMatches == RequiredExtensions.Length && FindSuitableQueueFamily(api, physicalDevice, surface, out _) != InvalidIndex; + } + + internal static uint FindSuitableQueueFamily(Vk api, PhysicalDevice physicalDevice, SurfaceKHR surface, out uint queueCount) + { + const QueueFlags RequiredFlags = QueueFlags.QueueGraphicsBit | QueueFlags.QueueComputeBit; + + var khrSurface = new KhrSurface(api.Context); + + uint propertiesCount; + + api.GetPhysicalDeviceQueueFamilyProperties(physicalDevice, &propertiesCount, null); + + QueueFamilyProperties[] properties = new QueueFamilyProperties[propertiesCount]; + + fixed (QueueFamilyProperties* pProperties = properties) + { + api.GetPhysicalDeviceQueueFamilyProperties(physicalDevice, &propertiesCount, pProperties); + } + + for (uint index = 0; index < propertiesCount; index++) + { + var queueFlags = properties[index].QueueFlags; + + khrSurface.GetPhysicalDeviceSurfaceSupport(physicalDevice, index, surface, out var surfaceSupported).ThrowOnError(); + + if (queueFlags.HasFlag(RequiredFlags) && surfaceSupported) + { + queueCount = properties[index].QueueCount; + return index; + } + } + + queueCount = 0; + return InvalidIndex; + } + + public static Device CreateDevice(Vk api, PhysicalDevice physicalDevice, uint queueFamilyIndex, string[] supportedExtensions, uint queueCount) + { + if (queueCount > QueuesCount) + { + queueCount = QueuesCount; + } + + float* queuePriorities = stackalloc float[(int)queueCount]; + + for (int i = 0; i < queueCount; i++) + { + queuePriorities[i] = 1f; + } + + var queueCreateInfo = new DeviceQueueCreateInfo() + { + SType = StructureType.DeviceQueueCreateInfo, + QueueFamilyIndex = queueFamilyIndex, + QueueCount = queueCount, + PQueuePriorities = queuePriorities + }; + + api.GetPhysicalDeviceProperties(physicalDevice, out var properties); + bool useRobustBufferAccess = VendorUtils.FromId(properties.VendorID) == Vendor.Nvidia; + + var supportedFeatures = api.GetPhysicalDeviceFeature(physicalDevice); + + var features = new PhysicalDeviceFeatures() + { + DepthBiasClamp = true, + DepthClamp = true, + DualSrcBlend = true, + FragmentStoresAndAtomics = true, + GeometryShader = true, + ImageCubeArray = true, + IndependentBlend = true, + LogicOp = true, + MultiViewport = true, + PipelineStatisticsQuery = true, + SamplerAnisotropy = true, + ShaderClipDistance = true, + ShaderFloat64 = supportedFeatures.ShaderFloat64, + ShaderImageGatherExtended = true, + // ShaderStorageImageReadWithoutFormat = true, + // ShaderStorageImageWriteWithoutFormat = true, + TessellationShader = true, + VertexPipelineStoresAndAtomics = true, + RobustBufferAccess = useRobustBufferAccess + }; + + void* pExtendedFeatures = null; + + var featuresTransformFeedback = new PhysicalDeviceTransformFeedbackFeaturesEXT() + { + SType = StructureType.PhysicalDeviceTransformFeedbackFeaturesExt, + PNext = pExtendedFeatures, + TransformFeedback = true + }; + + pExtendedFeatures = &featuresTransformFeedback; + + var featuresRobustness2 = new PhysicalDeviceRobustness2FeaturesEXT() + { + SType = StructureType.PhysicalDeviceRobustness2FeaturesExt, + PNext = pExtendedFeatures, + NullDescriptor = true + }; + + pExtendedFeatures = &featuresRobustness2; + + var featuresExtendedDynamicState = new PhysicalDeviceExtendedDynamicStateFeaturesEXT() + { + SType = StructureType.PhysicalDeviceExtendedDynamicStateFeaturesExt, + PNext = pExtendedFeatures, + ExtendedDynamicState = supportedExtensions.Contains(ExtExtendedDynamicState.ExtensionName) + }; + + pExtendedFeatures = &featuresExtendedDynamicState; + + var featuresVk11 = new PhysicalDeviceVulkan11Features() + { + SType = StructureType.PhysicalDeviceVulkan11Features, + PNext = pExtendedFeatures, + ShaderDrawParameters = true + }; + + pExtendedFeatures = &featuresVk11; + + var featuresVk12 = new PhysicalDeviceVulkan12Features() + { + SType = StructureType.PhysicalDeviceVulkan12Features, + PNext = pExtendedFeatures, + DrawIndirectCount = supportedExtensions.Contains(KhrDrawIndirectCount.ExtensionName) + }; + + pExtendedFeatures = &featuresVk12; + + PhysicalDeviceIndexTypeUint8FeaturesEXT featuresIndexU8; + + if (supportedExtensions.Contains("VK_EXT_index_type_uint8")) + { + featuresIndexU8 = new PhysicalDeviceIndexTypeUint8FeaturesEXT() + { + SType = StructureType.PhysicalDeviceIndexTypeUint8FeaturesExt, + PNext = pExtendedFeatures, + IndexTypeUint8 = true + }; + + pExtendedFeatures = &featuresIndexU8; + } + + PhysicalDeviceFragmentShaderInterlockFeaturesEXT featuresFragmentShaderInterlock; + + if (supportedExtensions.Contains("VK_EXT_fragment_shader_interlock")) + { + featuresFragmentShaderInterlock = new PhysicalDeviceFragmentShaderInterlockFeaturesEXT() + { + SType = StructureType.PhysicalDeviceFragmentShaderInterlockFeaturesExt, + PNext = pExtendedFeatures, + FragmentShaderPixelInterlock = true + }; + + pExtendedFeatures = &featuresFragmentShaderInterlock; + } + + PhysicalDeviceSubgroupSizeControlFeaturesEXT featuresSubgroupSizeControl; + + if (supportedExtensions.Contains("VK_EXT_subgroup_size_control")) + { + featuresSubgroupSizeControl = new PhysicalDeviceSubgroupSizeControlFeaturesEXT() + { + SType = StructureType.PhysicalDeviceSubgroupSizeControlFeaturesExt, + PNext = pExtendedFeatures, + SubgroupSizeControl = true + }; + + pExtendedFeatures = &featuresSubgroupSizeControl; + } + + var enabledExtensions = RequiredExtensions.Union(DesirableExtensions.Intersect(supportedExtensions)).ToArray(); + + IntPtr* ppEnabledExtensions = stackalloc IntPtr[enabledExtensions.Length]; + + for (int i = 0; i < enabledExtensions.Length; i++) + { + ppEnabledExtensions[i] = Marshal.StringToHGlobalAnsi(enabledExtensions[i]); + } + + var deviceCreateInfo = new DeviceCreateInfo() + { + SType = StructureType.DeviceCreateInfo, + PNext = pExtendedFeatures, + QueueCreateInfoCount = 1, + PQueueCreateInfos = &queueCreateInfo, + PpEnabledExtensionNames = (byte**)ppEnabledExtensions, + EnabledExtensionCount = (uint)enabledExtensions.Length, + PEnabledFeatures = &features + }; + + api.CreateDevice(physicalDevice, in deviceCreateInfo, null, out var device).ThrowOnError(); + + for (int i = 0; i < enabledExtensions.Length; i++) + { + Marshal.FreeHGlobal(ppEnabledExtensions[i]); + } + + return device; + } + + public static string[] GetSupportedExtensions(Vk api, PhysicalDevice physicalDevice) + { + uint propertiesCount; + + api.EnumerateDeviceExtensionProperties(physicalDevice, (byte*)null, &propertiesCount, null).ThrowOnError(); + + ExtensionProperties[] extensionProperties = new ExtensionProperties[propertiesCount]; + + fixed (ExtensionProperties* pExtensionProperties = extensionProperties) + { + api.EnumerateDeviceExtensionProperties(physicalDevice, (byte*)null, &propertiesCount, pExtensionProperties).ThrowOnError(); + } + + return extensionProperties.Select(x => Marshal.PtrToStringAnsi((IntPtr)x.ExtensionName)).ToArray(); + } + + internal static CommandBufferPool CreateCommandBufferPool(Vk api, Device device, Queue queue, object queueLock, uint queueFamilyIndex) + { + return new CommandBufferPool(api, device, queue, queueLock, queueFamilyIndex); + } + + internal unsafe static void CreateDebugCallbacks(Vk api, GraphicsDebugLevel logLevel, Instance instance, out ExtDebugReport debugReport, out DebugReportCallbackEXT debugReportCallback) + { + debugReport = default; + + if (logLevel != GraphicsDebugLevel.None) + { + if (!api.TryGetInstanceExtension(instance, out debugReport)) + { + throw new Exception(); + // TODO: Exception. + } + + var flags = logLevel switch + { + GraphicsDebugLevel.Error => DebugReportFlagsEXT.DebugReportErrorBitExt, + GraphicsDebugLevel.Slowdowns => DebugReportFlagsEXT.DebugReportErrorBitExt | DebugReportFlagsEXT.DebugReportPerformanceWarningBitExt, + GraphicsDebugLevel.All => DebugReportFlagsEXT.DebugReportInformationBitExt | + DebugReportFlagsEXT.DebugReportWarningBitExt | + DebugReportFlagsEXT.DebugReportPerformanceWarningBitExt | + DebugReportFlagsEXT.DebugReportErrorBitExt | + DebugReportFlagsEXT.DebugReportDebugBitExt, + _ => throw new NotSupportedException() + }; + var debugReportCallbackCreateInfo = new DebugReportCallbackCreateInfoEXT() + { + SType = StructureType.DebugReportCallbackCreateInfoExt, + Flags = flags, + PfnCallback = new PfnDebugReportCallbackEXT(DebugReport) + }; + + debugReport.CreateDebugReportCallback(instance, in debugReportCallbackCreateInfo, null, out debugReportCallback).ThrowOnError(); + } + else + { + debugReportCallback = default; + } + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/Window.cs b/Ryujinx.Graphics.Vulkan/Window.cs new file mode 100644 index 000000000..88d556819 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/Window.cs @@ -0,0 +1,432 @@ +using Ryujinx.Graphics.GAL; +using Silk.NET.Vulkan; +using System; +using System.Linq; +using VkFormat = Silk.NET.Vulkan.Format; + +namespace Ryujinx.Graphics.Vulkan +{ + class Window : WindowBase, IDisposable + { + private const int SurfaceWidth = 1280; + private const int SurfaceHeight = 720; + + private readonly VulkanGraphicsDevice _gd; + private readonly SurfaceKHR _surface; + private readonly PhysicalDevice _physicalDevice; + private readonly Device _device; + private SwapchainKHR _swapchain; + + private Image[] _swapchainImages; + private Auto[] _swapchainImageViews; + + private Semaphore _imageAvailableSemaphore; + private Semaphore _renderFinishedSemaphore; + + private int _width; + private int _height; + private VkFormat _format; + + public unsafe Window(VulkanGraphicsDevice gd, SurfaceKHR surface, PhysicalDevice physicalDevice, Device device) + { + _gd = gd; + _physicalDevice = physicalDevice; + _device = device; + _surface = surface; + + CreateSwapchain(); + + var semaphoreCreateInfo = new SemaphoreCreateInfo() + { + SType = StructureType.SemaphoreCreateInfo + }; + + gd.Api.CreateSemaphore(device, semaphoreCreateInfo, null, out _imageAvailableSemaphore).ThrowOnError(); + gd.Api.CreateSemaphore(device, semaphoreCreateInfo, null, out _renderFinishedSemaphore).ThrowOnError(); + } + + private void RecreateSwapchain() + { + for (int i = 0; i < _swapchainImageViews.Length; i++) + { + _swapchainImageViews[i].Dispose(); + } + + CreateSwapchain(); + } + + private unsafe void CreateSwapchain() + { + _gd.SurfaceApi.GetPhysicalDeviceSurfaceCapabilities(_physicalDevice, _surface, out var capabilities); + + uint surfaceFormatsCount; + + _gd.SurfaceApi.GetPhysicalDeviceSurfaceFormats(_physicalDevice, _surface, &surfaceFormatsCount, null); + + var surfaceFormats = new SurfaceFormatKHR[surfaceFormatsCount]; + + fixed (SurfaceFormatKHR* pSurfaceFormats = surfaceFormats) + { + _gd.SurfaceApi.GetPhysicalDeviceSurfaceFormats(_physicalDevice, _surface, &surfaceFormatsCount, pSurfaceFormats); + } + + uint presentModesCount; + + _gd.SurfaceApi.GetPhysicalDeviceSurfacePresentModes(_physicalDevice, _surface, &presentModesCount, null); + + var presentModes = new PresentModeKHR[presentModesCount]; + + fixed (PresentModeKHR* pPresentModes = presentModes) + { + _gd.SurfaceApi.GetPhysicalDeviceSurfacePresentModes(_physicalDevice, _surface, &presentModesCount, pPresentModes); + } + + uint imageCount = capabilities.MinImageCount + 1; + if (capabilities.MaxImageCount > 0 && imageCount > capabilities.MaxImageCount) + { + imageCount = capabilities.MaxImageCount; + } + + var surfaceFormat = ChooseSwapSurfaceFormat(surfaceFormats); + + var extent = ChooseSwapExtent(capabilities); + + _width = (int)extent.Width; + _height = (int)extent.Height; + _format = surfaceFormat.Format; + + var oldSwapchain = _swapchain; + + var swapchainCreateInfo = new SwapchainCreateInfoKHR() + { + SType = StructureType.SwapchainCreateInfoKhr, + Surface = _surface, + MinImageCount = imageCount, + ImageFormat = surfaceFormat.Format, + ImageColorSpace = surfaceFormat.ColorSpace, + ImageExtent = extent, + ImageUsage = ImageUsageFlags.ImageUsageColorAttachmentBit | ImageUsageFlags.ImageUsageTransferDstBit, + ImageSharingMode = SharingMode.Exclusive, + ImageArrayLayers = 1, + PreTransform = capabilities.CurrentTransform, + CompositeAlpha = CompositeAlphaFlagsKHR.CompositeAlphaOpaqueBitKhr, + PresentMode = ChooseSwapPresentMode(presentModes), + Clipped = true, + OldSwapchain = oldSwapchain + }; + + _gd.SwapchainApi.CreateSwapchain(_device, swapchainCreateInfo, null, out _swapchain).ThrowOnError(); + + _gd.SwapchainApi.GetSwapchainImages(_device, _swapchain, &imageCount, null); + + _swapchainImages = new Image[imageCount]; + + fixed (Image* pSwapchainImages = _swapchainImages) + { + _gd.SwapchainApi.GetSwapchainImages(_device, _swapchain, &imageCount, pSwapchainImages); + } + + _swapchainImageViews = new Auto[imageCount]; + + for (int i = 0; i < imageCount; i++) + { + _swapchainImageViews[i] = CreateSwapchainImageView(_swapchainImages[i], surfaceFormat.Format); + } + } + + private unsafe Auto CreateSwapchainImageView(Image swapchainImage, VkFormat format) + { + var componentMapping = new ComponentMapping( + ComponentSwizzle.R, + ComponentSwizzle.G, + ComponentSwizzle.B, + ComponentSwizzle.A); + + var aspectFlags = ImageAspectFlags.ImageAspectColorBit; + + var subresourceRange = new ImageSubresourceRange(aspectFlags, 0, 1, 0, 1); + + var imageCreateInfo = new ImageViewCreateInfo() + { + SType = StructureType.ImageViewCreateInfo, + Image = swapchainImage, + ViewType = ImageViewType.ImageViewType2D, + Format = format, + Components = componentMapping, + SubresourceRange = subresourceRange + }; + + _gd.Api.CreateImageView(_device, imageCreateInfo, null, out var imageView).ThrowOnError(); + return new Auto(new DisposableImageView(_gd.Api, _device, imageView)); + } + + private static SurfaceFormatKHR ChooseSwapSurfaceFormat(SurfaceFormatKHR[] availableFormats) + { + if (availableFormats.Length == 1 && availableFormats[0].Format == VkFormat.Undefined) + { + return new SurfaceFormatKHR(VkFormat.B8G8R8A8Unorm, ColorSpaceKHR.ColorspaceSrgbNonlinearKhr); + } + + foreach (var format in availableFormats) + { + if (format.Format == VkFormat.B8G8R8A8Unorm && format.ColorSpace == ColorSpaceKHR.ColorspaceSrgbNonlinearKhr) + { + return format; + } + } + + return availableFormats[0]; + } + + private static PresentModeKHR ChooseSwapPresentMode(PresentModeKHR[] availablePresentModes) + { + if (availablePresentModes.Contains(PresentModeKHR.PresentModeImmediateKhr)) + { + return PresentModeKHR.PresentModeImmediateKhr; + } + else if (availablePresentModes.Contains(PresentModeKHR.PresentModeMailboxKhr)) + { + return PresentModeKHR.PresentModeMailboxKhr; + } + else + { + return PresentModeKHR.PresentModeFifoKhr; + } + } + + public static Extent2D ChooseSwapExtent(SurfaceCapabilitiesKHR capabilities) + { + if (capabilities.CurrentExtent.Width != uint.MaxValue) + { + return capabilities.CurrentExtent; + } + else + { + uint width = Math.Max(capabilities.MinImageExtent.Width, Math.Min(capabilities.MaxImageExtent.Width, SurfaceWidth)); + uint height = Math.Max(capabilities.MinImageExtent.Height, Math.Min(capabilities.MaxImageExtent.Height, SurfaceHeight)); + + return new Extent2D(width, height); + } + } + + public unsafe override void Present(ITexture texture, ImageCrop crop, Action swapBuffersCallback) + { + uint nextImage = 0; + + while (true) + { + var acquireResult = _gd.SwapchainApi.AcquireNextImage( + _device, + _swapchain, + ulong.MaxValue, + _imageAvailableSemaphore, + new Fence(), + ref nextImage); + + if (acquireResult == Result.ErrorOutOfDateKhr || + acquireResult == Result.SuboptimalKhr) + { + RecreateSwapchain(); + } + else + { + acquireResult.ThrowOnError(); + break; + } + } + + var swapchainImage = _swapchainImages[nextImage]; + + _gd.FlushAllCommands(); + + var cbs = _gd.CommandBufferPool.Rent(); + + Transition( + cbs.CommandBuffer, + swapchainImage, + 0, + AccessFlags.AccessTransferWriteBit, + ImageLayout.Undefined, + ImageLayout.General); + + var view = (TextureView)texture; + + int srcX0, srcX1, srcY0, srcY1; + float scale = view.ScaleFactor; + + if (crop.Left == 0 && crop.Right == 0) + { + srcX0 = 0; + srcX1 = (int)(view.Width / scale); + } + else + { + srcX0 = crop.Left; + srcX1 = crop.Right; + } + + if (crop.Top == 0 && crop.Bottom == 0) + { + srcY0 = 0; + srcY1 = (int)(view.Height / scale); + } + else + { + srcY0 = crop.Top; + srcY1 = crop.Bottom; + } + + if (scale != 1f) + { + srcX0 = (int)(srcX0 * scale); + srcY0 = (int)(srcY0 * scale); + srcX1 = (int)Math.Ceiling(srcX1 * scale); + srcY1 = (int)Math.Ceiling(srcY1 * scale); + } + + if (ScreenCaptureRequested) + { + CaptureFrame(view, srcX0, srcY0, srcX1 - srcX0, srcY1 - srcY0, view.Info.Format.IsBgr(), crop.FlipX, crop.FlipY); + + ScreenCaptureRequested = false; + } + + float ratioX = crop.IsStretched ? 1.0f : MathF.Min(1.0f, _height * crop.AspectRatioX / (_width * crop.AspectRatioY)); + float ratioY = crop.IsStretched ? 1.0f : MathF.Min(1.0f, _width * crop.AspectRatioY / (_height * crop.AspectRatioX)); + + int dstWidth = (int)(_width * ratioX); + int dstHeight = (int)(_height * ratioY); + + int dstPaddingX = (_width - dstWidth) / 2; + int dstPaddingY = (_height - dstHeight) / 2; + + int dstX0 = crop.FlipX ? _width - dstPaddingX : dstPaddingX; + int dstX1 = crop.FlipX ? dstPaddingX : _width - dstPaddingX; + + int dstY0 = crop.FlipY ? dstPaddingY : _height - dstPaddingY; + int dstY1 = crop.FlipY ? _height - dstPaddingY : dstPaddingY; + + _gd.HelperShader.Blit( + _gd, + cbs, + view, + _swapchainImageViews[nextImage], + _width, + _height, + _format, + new Extents2D(srcX0, srcY0, srcX1, srcY1), + new Extents2D(dstX0, dstY1, dstX1, dstY0), + true, + true); + + Transition( + cbs.CommandBuffer, + swapchainImage, + 0, + 0, + ImageLayout.General, + ImageLayout.PresentSrcKhr); + + _gd.CommandBufferPool.Return( + cbs, + new[] { _imageAvailableSemaphore }, + new[] { PipelineStageFlags.PipelineStageColorAttachmentOutputBit }, + new[] { _renderFinishedSemaphore }); + + // TODO: Present queue. + var semaphore = _renderFinishedSemaphore; + var swapchain = _swapchain; + + Result result; + + var presentInfo = new PresentInfoKHR() + { + SType = StructureType.PresentInfoKhr, + WaitSemaphoreCount = 1, + PWaitSemaphores = &semaphore, + SwapchainCount = 1, + PSwapchains = &swapchain, + PImageIndices = &nextImage, + PResults = &result + }; + + lock (_gd.QueueLock) + { + _gd.SwapchainApi.QueuePresent(_gd.Queue, presentInfo); + } + } + + private unsafe void Transition( + CommandBuffer commandBuffer, + Image image, + AccessFlags srcAccess, + AccessFlags dstAccess, + ImageLayout srcLayout, + ImageLayout dstLayout) + { + var subresourceRange = new ImageSubresourceRange(ImageAspectFlags.ImageAspectColorBit, 0, 1, 0, 1); + + var barrier = new ImageMemoryBarrier() + { + SType = StructureType.ImageMemoryBarrier, + SrcAccessMask = srcAccess, + DstAccessMask = dstAccess, + OldLayout = srcLayout, + NewLayout = dstLayout, + SrcQueueFamilyIndex = Vk.QueueFamilyIgnored, + DstQueueFamilyIndex = Vk.QueueFamilyIgnored, + Image = image, + SubresourceRange = subresourceRange + }; + + _gd.Api.CmdPipelineBarrier( + commandBuffer, + PipelineStageFlags.PipelineStageTopOfPipeBit, + PipelineStageFlags.PipelineStageAllCommandsBit, + 0, + 0, + null, + 0, + null, + 1, + barrier); + } + + private void CaptureFrame(TextureView texture, int x, int y, int width, int height, bool isBgra, bool flipX, bool flipY) + { + byte[] bitmap = texture.GetData(x, y, width, height); + + _gd.OnScreenCaptured(new ScreenCaptureImageInfo(width, height, isBgra, bitmap, flipX, flipY)); + } + + public override void SetSize(int width, int height) + { + // Not needed as we can get the size from the surface. + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + unsafe + { + _gd.Api.DestroySemaphore(_device, _renderFinishedSemaphore, null); + _gd.Api.DestroySemaphore(_device, _imageAvailableSemaphore, null); + + for (int i = 0; i < _swapchainImageViews.Length; i++) + { + _swapchainImageViews[i].Dispose(); + } + + _gd.SwapchainApi.DestroySwapchain(_device, _swapchain, null); + + } + } + } + + public override void Dispose() + { + Dispose(true); + } + } +} diff --git a/Ryujinx.Graphics.Vulkan/WindowBase.cs b/Ryujinx.Graphics.Vulkan/WindowBase.cs new file mode 100644 index 000000000..4f1f0d165 --- /dev/null +++ b/Ryujinx.Graphics.Vulkan/WindowBase.cs @@ -0,0 +1,14 @@ +using Ryujinx.Graphics.GAL; +using System; + +namespace Ryujinx.Graphics.Vulkan +{ + internal abstract class WindowBase: IWindow + { + public bool ScreenCaptureRequested { get; set; } + + public abstract void Dispose(); + public abstract void Present(ITexture texture, ImageCrop crop, Action swapBuffersCallback); + public abstract void SetSize(int width, int height); + } +} \ No newline at end of file diff --git a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs index 0caeacade..d01c3e3b2 100644 --- a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs +++ b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs @@ -735,11 +735,12 @@ namespace Ryujinx.HLE.HOS.Kernel.Process ulong argsPtr, ulong stackTop, int priority, - int cpuCore) + int cpuCore, + ThreadStart customThreadStart = null) { lock (_processLock) { - return thread.Initialize(entrypoint, argsPtr, stackTop, priority, cpuCore, this, ThreadType.User, null); + return thread.Initialize(entrypoint, argsPtr, stackTop, priority, cpuCore, this, ThreadType.User, customThreadStart); } } diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/Syscall.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/Syscall.cs index 179c7d4b2..189e4a3ea 100644 --- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/Syscall.cs +++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/Syscall.cs @@ -2350,6 +2350,18 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall [PointerSized] ulong stackTop, int priority, int cpuCore) + { + return CreateThread(out handle, entrypoint, argsPtr, stackTop, priority, cpuCore, null); + } + + public KernelResult CreateThread( + out int handle, + ulong entrypoint, + ulong argsPtr, + ulong stackTop, + int priority, + int cpuCore, + ThreadStart customThreadStart) { handle = 0; @@ -2386,7 +2398,8 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall argsPtr, stackTop, priority, - cpuCore); + cpuCore, + customThreadStart); if (result == KernelResult.Success) { diff --git a/Ryujinx.HLE/HOS/Services/Nv/Types/NvFence.cs b/Ryujinx.HLE/HOS/Services/Nv/Types/NvFence.cs index 664610a43..e57e5e082 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/Types/NvFence.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/Types/NvFence.cs @@ -32,7 +32,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.Types { if (IsValid()) { - return gpuContext.Synchronization.WaitOnSyncpoint(Id, Value, timeout); + // return gpuContext.Synchronization.WaitOnSyncpoint(Id, Value, timeout); } return false; diff --git a/Ryujinx.HLE/HOS/Services/ServerBase.cs b/Ryujinx.HLE/HOS/Services/ServerBase.cs index 907833441..c13ee1863 100644 --- a/Ryujinx.HLE/HOS/Services/ServerBase.cs +++ b/Ryujinx.HLE/HOS/Services/ServerBase.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common.Logging; using Ryujinx.HLE.HOS.Ipc; using Ryujinx.HLE.HOS.Kernel; using Ryujinx.HLE.HOS.Kernel.Common; @@ -38,15 +39,15 @@ namespace Ryujinx.HLE.HOS.Services private readonly Dictionary> _ports = new Dictionary>(); public ManualResetEvent InitDone { get; } - public Func SmObjectFactory { get; } public string Name { get; } + public Func SmObjectFactory { get; } public ServerBase(KernelContext context, string name, Func smObjectFactory = null) { InitDone = new ManualResetEvent(false); + _context = context; Name = name; SmObjectFactory = smObjectFactory; - _context = context; const ProcessCreationFlags flags = ProcessCreationFlags.EnableAslr | @@ -56,7 +57,7 @@ namespace Ryujinx.HLE.HOS.Services ProcessCreationInfo creationInfo = new ProcessCreationInfo("Service", 1, 0, 0x8000000, 1, flags, 0, 0); - KernelStatic.StartInitialProcess(context, creationInfo, DefaultCapabilities, 44, ServerLoop); + KernelStatic.StartInitialProcess(context, creationInfo, DefaultCapabilities, 44, Main); } private void AddPort(int serverPortHandle, Func objectFactory) @@ -80,6 +81,11 @@ namespace Ryujinx.HLE.HOS.Services _sessions.Add(serverSessionHandle, obj); } + private void Main() + { + ServerLoop(); + } + private void ServerLoop() { _selfProcess = KernelStatic.GetCurrentProcess(); diff --git a/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/EventFileDescriptor.cs b/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/EventFileDescriptor.cs index e92b42ef0..239e2434c 100644 --- a/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/EventFileDescriptor.cs +++ b/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/EventFileDescriptor.cs @@ -8,7 +8,6 @@ namespace Ryujinx.HLE.HOS.Services.Sockets.Bsd { private ulong _value; private readonly EventFdFlags _flags; - private AutoResetEvent _event; private object _lock = new object(); @@ -19,9 +18,13 @@ namespace Ryujinx.HLE.HOS.Services.Sockets.Bsd public EventFileDescriptor(ulong value, EventFdFlags flags) { + // FIXME: We should support blocking operations. + // Right now they can't be supported because it would cause the + // service to lock up as we only have one thread processing requests. + flags |= EventFdFlags.NonBlocking; + _value = value; _flags = flags; - _event = new AutoResetEvent(false); WriteEvent = new ManualResetEvent(true); ReadEvent = new ManualResetEvent(true); @@ -31,7 +34,6 @@ namespace Ryujinx.HLE.HOS.Services.Sockets.Bsd public void Dispose() { - _event.Dispose(); WriteEvent.Dispose(); ReadEvent.Dispose(); } @@ -57,7 +59,7 @@ namespace Ryujinx.HLE.HOS.Services.Sockets.Bsd { while (_value == 0) { - _event.WaitOne(); + Monitor.Wait(_lock); } } else @@ -106,7 +108,7 @@ namespace Ryujinx.HLE.HOS.Services.Sockets.Bsd { if (Blocking) { - _event.WaitOne(); + Monitor.Wait(_lock); } else { @@ -119,7 +121,7 @@ namespace Ryujinx.HLE.HOS.Services.Sockets.Bsd writeSize = sizeof(ulong); _value += count; - _event.Set(); + Monitor.Pulse(_lock); WriteEvent.Set(); diff --git a/Ryujinx.Headless.SDL2/OpenGL/OpenGLWindow.cs b/Ryujinx.Headless.SDL2/OpenGL/OpenGLWindow.cs index a69951433..88dcbccfc 100644 --- a/Ryujinx.Headless.SDL2/OpenGL/OpenGLWindow.cs +++ b/Ryujinx.Headless.SDL2/OpenGL/OpenGLWindow.cs @@ -108,14 +108,9 @@ namespace Ryujinx.Headless.SDL2.OpenGL _glLogLevel = glLogLevel; } - protected override string GetGpuVendorName() - { - return ((Renderer)Renderer).GpuVendor; - } - public override SDL_WindowFlags GetWindowFlags() => SDL_WindowFlags.SDL_WINDOW_OPENGL; - protected override void InitializeRenderer() + protected override void InitializeWindowRenderer() { // Ensure to not share this context with other contexts before this point. SetupOpenGLAttributes(false, _glLogLevel); @@ -147,7 +142,9 @@ namespace Ryujinx.Headless.SDL2.OpenGL MouseDriver.SetClientSize(DefaultWidth, DefaultHeight); } - protected override void FinalizeRenderer() + protected override void InitializeRenderer() { } + + protected override void FinalizeWindowRenderer() { // Try to bind the OpenGL context before calling the gpu disposal. _openGLContext.MakeCurrent(); diff --git a/Ryujinx.Headless.SDL2/Options.cs b/Ryujinx.Headless.SDL2/Options.cs index 4c9b83c4d..956941798 100644 --- a/Ryujinx.Headless.SDL2/Options.cs +++ b/Ryujinx.Headless.SDL2/Options.cs @@ -94,6 +94,9 @@ namespace Ryujinx.Headless.SDL2 [Option("enable-shader-cache", Required = false, Default = true, HelpText = "Enables Shader cache.")] public bool? EnableShaderCache { get; set; } + [Option("enable-texture-recompression", Required = false, Default = false, HelpText = "Enables Texture recompression.")] + public bool? EnableTextureRecompression { get; set; } + [Option("enable-docked-mode", Required = false, Default = true, HelpText = "Enables Docked Mode.")] public bool? EnableDockedMode { get; set; } @@ -164,6 +167,12 @@ namespace Ryujinx.Headless.SDL2 [Option("graphics-shaders-dump-path", Required = false, HelpText = "Dumps shaders in this local directory. (Developer only)")] public string GraphicsShadersDumpPath { get; set; } + [Option("graphics-backend", Required = false, Default = GraphicsBackend.OpenGl, HelpText = "Change Graphics Backend to use.")] + public GraphicsBackend GraphicsBackend { get; set; } + + [Option("preferred-gpu-vendor", Required = false, Default = "", HelpText = "When using the Vulkan backend, prefer using the GPU from the specified vendor.")] + public string PreferredGpuVendor { get; set; } + // Hacks [Option("expand-ram", Required = false, Default = false, HelpText = "Expands the RAM amount on the emulated system from 4GB to 6GB.")] diff --git a/Ryujinx.Headless.SDL2/Program.cs b/Ryujinx.Headless.SDL2/Program.cs index 1d64a8c6c..deb34d051 100644 --- a/Ryujinx.Headless.SDL2/Program.cs +++ b/Ryujinx.Headless.SDL2/Program.cs @@ -17,7 +17,9 @@ using Ryujinx.Graphics.GAL.Multithreading; using Ryujinx.Graphics.Gpu; using Ryujinx.Graphics.Gpu.Shader; using Ryujinx.Graphics.OpenGL; +using Ryujinx.Graphics.Vulkan; using Ryujinx.Headless.SDL2.OpenGL; +using Ryujinx.Headless.SDL2.Vulkan; using Ryujinx.HLE; using Ryujinx.HLE.FileSystem; using Ryujinx.HLE.HOS; @@ -25,6 +27,7 @@ using Ryujinx.HLE.HOS.Services.Account.Acc; using Ryujinx.Input; using Ryujinx.Input.HLE; using Ryujinx.Input.SDL2; +using Silk.NET.Vulkan; using System; using System.Collections.Generic; using System.IO; @@ -404,6 +407,7 @@ namespace Ryujinx.Headless.SDL2 // Setup graphics configuration GraphicsConfig.EnableShaderCache = (bool)option.EnableShaderCache; + GraphicsConfig.EnableTextureRecompression = (bool)option.EnableTextureRecompression; GraphicsConfig.ResScale = option.ResScale; GraphicsConfig.MaxAnisotropy = option.MaxAnisotropy; GraphicsConfig.ShadersDumpPath = option.GraphicsShadersDumpPath; @@ -449,10 +453,49 @@ namespace Ryujinx.Headless.SDL2 Logger.Info?.Print(LogClass.Application, label); } - private static Switch InitializeEmulationContext(WindowBase window, Options options) + private static (WindowBase, IRenderer) CreateRenderer(Options options) { - IRenderer renderer = new Renderer(); + WindowBase window; + IRenderer renderer; + if (options.GraphicsBackend == GraphicsBackend.Vulkan) + { + VulkanWindow vulkanWindow = new VulkanWindow(_inputManager, options.LoggingGraphicsDebugLevel, options.AspectRatio, (bool)options.EnableMouse); + window = vulkanWindow; + + string preferredGpuId = string.Empty; + + if (!string.IsNullOrEmpty(options.PreferredGpuVendor)) + { + string preferredGpuVendor = options.PreferredGpuVendor.ToLowerInvariant(); + var devices = VulkanGraphicsDevice.GetPhysicalDevices(); + + foreach (var device in devices) + { + if (device.Vendor.ToLowerInvariant() == preferredGpuVendor) + { + preferredGpuId = device.Id; + break; + } + } + } + + renderer = new VulkanGraphicsDevice( + (instance, vk) => new SurfaceKHR((ulong)(vulkanWindow.CreateWindowSurface(instance.Handle))), + vulkanWindow.GetRequiredInstanceExtensions, + preferredGpuId); + } + else + { + window = new OpenGLWindow(_inputManager, options.LoggingGraphicsDebugLevel, options.AspectRatio, (bool)options.EnableMouse); + renderer = new Renderer(); + } + + return (window, renderer); + } + + private static Switch InitializeEmulationContext(WindowBase window, IRenderer renderer, Options options) + { BackendThreading threadingMode = options.BackendThreading; bool threadedGAL = threadingMode == BackendThreading.On || (threadingMode == BackendThreading.Auto && renderer.PreferThreading); @@ -521,8 +564,11 @@ namespace Ryujinx.Headless.SDL2 Logger.RestartTime(); - _window = new OpenGLWindow(_inputManager, options.LoggingGraphicsDebugLevel, options.AspectRatio, (bool)options.EnableMouse); - _emulationContext = InitializeEmulationContext(_window, options); + (WindowBase window, IRenderer renderer) = CreateRenderer(options); + + _window = window; + + _emulationContext = InitializeEmulationContext(window, renderer, options); SetupProgressHandler(); diff --git a/Ryujinx.Headless.SDL2/Ryujinx.Headless.SDL2.csproj b/Ryujinx.Headless.SDL2/Ryujinx.Headless.SDL2.csproj index 89e7e81a9..a896626a6 100644 --- a/Ryujinx.Headless.SDL2/Ryujinx.Headless.SDL2.csproj +++ b/Ryujinx.Headless.SDL2/Ryujinx.Headless.SDL2.csproj @@ -12,10 +12,12 @@ + + diff --git a/Ryujinx.Headless.SDL2/Vulkan/VulkanWindow.cs b/Ryujinx.Headless.SDL2/Vulkan/VulkanWindow.cs new file mode 100644 index 000000000..5287280c9 --- /dev/null +++ b/Ryujinx.Headless.SDL2/Vulkan/VulkanWindow.cs @@ -0,0 +1,91 @@ +using Ryujinx.Common.Configuration; +using Ryujinx.Common.Logging; +using Ryujinx.Input.HLE; +using System; +using System.Runtime.InteropServices; +using System.Text; +using static SDL2.SDL; + +namespace Ryujinx.Headless.SDL2.Vulkan +{ + class VulkanWindow : WindowBase + { + private GraphicsDebugLevel _glLogLevel; + + public VulkanWindow(InputManager inputManager, GraphicsDebugLevel glLogLevel, AspectRatio aspectRatio, bool enableMouse) : base(inputManager, glLogLevel, aspectRatio, enableMouse) + { + _glLogLevel = glLogLevel; + } + + public override SDL_WindowFlags GetWindowFlags() => SDL_WindowFlags.SDL_WINDOW_VULKAN; + + protected override void InitializeWindowRenderer() { } + + protected override void InitializeRenderer() + { + Renderer?.Window.SetSize(DefaultWidth, DefaultHeight); + MouseDriver.SetClientSize(DefaultWidth, DefaultHeight); + } + + public unsafe IntPtr CreateWindowSurface(IntPtr instance) + { + if (SDL_Vulkan_CreateSurface(WindowHandle, instance, out ulong surfaceHandle) == SDL_bool.SDL_FALSE) + { + string errorMessage = $"SDL_Vulkan_CreateSurface failed with error \"{SDL_GetError()}\""; + + Logger.Error?.Print(LogClass.Application, errorMessage); + + throw new Exception(errorMessage); + } + + return (IntPtr)surfaceHandle; + } + + private static unsafe string GetStringFromUtf8Byte(byte* start) + { + byte* end = start; + while (*end != 0) end++; + + return Encoding.UTF8.GetString(start, (int)(end - start)); + } + + // TODO: Fix this in SDL2-CS. + [DllImport("SDL2", EntryPoint = "SDL_Vulkan_GetInstanceExtensions", CallingConvention = CallingConvention.Cdecl)] + public static extern SDL_bool SDL_Vulkan_GetInstanceExtensions_Workaround(IntPtr window, out uint count, IntPtr names); + + public unsafe string[] GetRequiredInstanceExtensions() + { + if (SDL_Vulkan_GetInstanceExtensions_Workaround(WindowHandle, out uint extensionsCount, IntPtr.Zero) == SDL_bool.SDL_TRUE) + { + IntPtr[] rawExtensions = new IntPtr[(int)extensionsCount]; + string[] extensions = new string[(int)extensionsCount]; + + fixed (IntPtr* rawExtensionsPtr = rawExtensions) + { + if (SDL_Vulkan_GetInstanceExtensions_Workaround(WindowHandle, out extensionsCount, (IntPtr)rawExtensionsPtr) == SDL_bool.SDL_TRUE) + { + for (int i = 0; i < extensions.Length; i++) + { + extensions[i] = GetStringFromUtf8Byte((byte*)rawExtensions[i]); + } + + return extensions; + } + } + } + + string errorMessage = $"SDL_Vulkan_GetInstanceExtensions failed with error \"{SDL_GetError()}\""; + + Logger.Error?.Print(LogClass.Application, errorMessage); + + throw new Exception(errorMessage); + } + + protected override void FinalizeWindowRenderer() + { + Device.DisposeGpu(); + } + + protected override void SwapBuffers(object texture) { } + } +} diff --git a/Ryujinx.Headless.SDL2/WindowBase.cs b/Ryujinx.Headless.SDL2/WindowBase.cs index 58aa8d070..2f25a5850 100644 --- a/Ryujinx.Headless.SDL2/WindowBase.cs +++ b/Ryujinx.Headless.SDL2/WindowBase.cs @@ -151,22 +151,29 @@ namespace Ryujinx.Headless.SDL2 } } + protected abstract void InitializeWindowRenderer(); + protected abstract void InitializeRenderer(); - protected abstract void FinalizeRenderer(); + protected abstract void FinalizeWindowRenderer(); protected abstract void SwapBuffers(object image); - protected abstract string GetGpuVendorName(); - public abstract SDL_WindowFlags GetWindowFlags(); + private string GetGpuVendorName() + { + return Renderer.GetHardwareInfo().GpuVendor; + } + public void Render() { - InitializeRenderer(); + InitializeWindowRenderer(); Device.Gpu.Renderer.Initialize(_glLogLevel); + InitializeRenderer(); + _gpuVendorName = GetGpuVendorName(); Device.Gpu.Renderer.RunLoop(() => @@ -220,7 +227,7 @@ namespace Ryujinx.Headless.SDL2 } }); - FinalizeRenderer(); + FinalizeWindowRenderer(); } public void Exit() diff --git a/Ryujinx.Ui.Common/Configuration/ConfigurationFileFormat.cs b/Ryujinx.Ui.Common/Configuration/ConfigurationFileFormat.cs index 28a2956a6..8b5355b1c 100644 --- a/Ryujinx.Ui.Common/Configuration/ConfigurationFileFormat.cs +++ b/Ryujinx.Ui.Common/Configuration/ConfigurationFileFormat.cs @@ -80,7 +80,7 @@ namespace Ryujinx.Ui.Common.Configuration /// Enables printing error log messages /// public bool LoggingEnableError { get; set; } - + /// /// Enables printing trace log messages /// @@ -161,6 +161,11 @@ namespace Ryujinx.Ui.Common.Configuration /// public bool EnableShaderCache { get; set; } + /// + /// Enables or disables texture recompression + /// + public bool EnableTextureRecompression { get; set; } + /// /// Enables or disables profiled translation cache persistency /// @@ -310,6 +315,16 @@ namespace Ryujinx.Ui.Common.Configuration /// public List InputConfig { get; set; } + /// + /// Graphics backend + /// + public GraphicsBackend GraphicsBackend { get; set; } + + /// + /// Preferred GPU + /// + public string PreferredGpu { get; set; } + /// /// Loads a configuration file from disk /// diff --git a/Ryujinx.Ui.Common/Configuration/ConfigurationState.cs b/Ryujinx.Ui.Common/Configuration/ConfigurationState.cs index c739b2949..1ec8895d7 100644 --- a/Ryujinx.Ui.Common/Configuration/ConfigurationState.cs +++ b/Ryujinx.Ui.Common/Configuration/ConfigurationState.cs @@ -310,7 +310,7 @@ namespace Ryujinx.Ui.Common.Configuration EnableDockedMode = new ReactiveObject(); EnableDockedMode.Event += static (sender, e) => LogValueChange(sender, e, nameof(EnableDockedMode)); EnablePtc = new ReactiveObject(); - EnablePtc.Event += static (sender, e) => LogValueChange(sender, e, nameof(EnablePtc)); + EnablePtc.Event += static (sender, e) => LogValueChange(sender, e, nameof(EnablePtc)); EnableInternetAccess = new ReactiveObject(); EnableInternetAccess.Event += static (sender, e) => LogValueChange(sender, e, nameof(EnableInternetAccess)); EnableFsIntegrityChecks = new ReactiveObject(); @@ -411,23 +411,44 @@ namespace Ryujinx.Ui.Common.Configuration /// public ReactiveObject EnableShaderCache { get; private set; } + /// + /// Enables or disables texture recompression + /// + public ReactiveObject EnableTextureRecompression { get; private set; } + + /// + /// Graphics backend + /// + public ReactiveObject GraphicsBackend { get; private set; } + + /// + /// Preferred GPU + /// + public ReactiveObject PreferredGpu { get; private set; } + public GraphicsSection() { - BackendThreading = new ReactiveObject(); - BackendThreading.Event += static (sender, e) => LogValueChange(sender, e, nameof(BackendThreading)); - ResScale = new ReactiveObject(); - ResScale.Event += static (sender, e) => LogValueChange(sender, e, nameof(ResScale)); - ResScaleCustom = new ReactiveObject(); - ResScaleCustom.Event += static (sender, e) => LogValueChange(sender, e, nameof(ResScaleCustom)); - MaxAnisotropy = new ReactiveObject(); - MaxAnisotropy.Event += static (sender, e) => LogValueChange(sender, e, nameof(MaxAnisotropy)); - AspectRatio = new ReactiveObject(); - AspectRatio.Event += static (sender, e) => LogValueChange(sender, e, nameof(AspectRatio)); - ShadersDumpPath = new ReactiveObject(); - EnableVsync = new ReactiveObject(); - EnableVsync.Event += static (sender, e) => LogValueChange(sender, e, nameof(EnableVsync)); - EnableShaderCache = new ReactiveObject(); - EnableShaderCache.Event += static (sender, e) => LogValueChange(sender, e, nameof(EnableShaderCache)); + BackendThreading = new ReactiveObject(); + BackendThreading.Event += static (sender, e) => LogValueChange(sender, e, nameof(BackendThreading)); + ResScale = new ReactiveObject(); + ResScale.Event += static (sender, e) => LogValueChange(sender, e, nameof(ResScale)); + ResScaleCustom = new ReactiveObject(); + ResScaleCustom.Event += static (sender, e) => LogValueChange(sender, e, nameof(ResScaleCustom)); + MaxAnisotropy = new ReactiveObject(); + MaxAnisotropy.Event += static (sender, e) => LogValueChange(sender, e, nameof(MaxAnisotropy)); + AspectRatio = new ReactiveObject(); + AspectRatio.Event += static (sender, e) => LogValueChange(sender, e, nameof(AspectRatio)); + ShadersDumpPath = new ReactiveObject(); + EnableVsync = new ReactiveObject(); + EnableVsync.Event += static (sender, e) => LogValueChange(sender, e, nameof(EnableVsync)); + EnableShaderCache = new ReactiveObject(); + EnableShaderCache.Event += static (sender, e) => LogValueChange(sender, e, nameof(EnableShaderCache)); + EnableTextureRecompression = new ReactiveObject(); + EnableTextureRecompression.Event += static (sender, e) => LogValueChange(sender, e, nameof(EnableTextureRecompression)); + GraphicsBackend = new ReactiveObject(); + GraphicsBackend.Event += static (sender, e) => LogValueChange(sender, e, nameof(GraphicsBackend)); + PreferredGpu = new ReactiveObject(); + PreferredGpu.Event += static (sender, e) => LogValueChange(sender, e, nameof(PreferredGpu)); } } @@ -498,45 +519,46 @@ namespace Ryujinx.Ui.Common.Configuration { ConfigurationFileFormat configurationFile = new ConfigurationFileFormat { - Version = ConfigurationFileFormat.CurrentVersion, - BackendThreading = Graphics.BackendThreading, - EnableFileLog = Logger.EnableFileLog, - ResScale = Graphics.ResScale, - ResScaleCustom = Graphics.ResScaleCustom, - MaxAnisotropy = Graphics.MaxAnisotropy, - AspectRatio = Graphics.AspectRatio, - GraphicsShadersDumpPath = Graphics.ShadersDumpPath, - LoggingEnableDebug = Logger.EnableDebug, - LoggingEnableStub = Logger.EnableStub, - LoggingEnableInfo = Logger.EnableInfo, - LoggingEnableWarn = Logger.EnableWarn, - LoggingEnableError = Logger.EnableError, - LoggingEnableTrace = Logger.EnableTrace, - LoggingEnableGuest = Logger.EnableGuest, - LoggingEnableFsAccessLog = Logger.EnableFsAccessLog, - LoggingFilteredClasses = Logger.FilteredClasses, - LoggingGraphicsDebugLevel = Logger.GraphicsDebugLevel, - SystemLanguage = System.Language, - SystemRegion = System.Region, - SystemTimeZone = System.TimeZone, - SystemTimeOffset = System.SystemTimeOffset, - DockedMode = System.EnableDockedMode, - EnableDiscordIntegration = EnableDiscordIntegration, - CheckUpdatesOnStart = CheckUpdatesOnStart, - ShowConfirmExit = ShowConfirmExit, - HideCursorOnIdle = HideCursorOnIdle, - EnableVsync = Graphics.EnableVsync, - EnableShaderCache = Graphics.EnableShaderCache, - EnablePtc = System.EnablePtc, - EnableInternetAccess = System.EnableInternetAccess, - EnableFsIntegrityChecks = System.EnableFsIntegrityChecks, - FsGlobalAccessLogMode = System.FsGlobalAccessLogMode, - AudioBackend = System.AudioBackend, - AudioVolume = System.AudioVolume, - MemoryManagerMode = System.MemoryManagerMode, - ExpandRam = System.ExpandRam, - IgnoreMissingServices = System.IgnoreMissingServices, - GuiColumns = new GuiColumns + Version = ConfigurationFileFormat.CurrentVersion, + BackendThreading = Graphics.BackendThreading, + EnableFileLog = Logger.EnableFileLog, + ResScale = Graphics.ResScale, + ResScaleCustom = Graphics.ResScaleCustom, + MaxAnisotropy = Graphics.MaxAnisotropy, + AspectRatio = Graphics.AspectRatio, + GraphicsShadersDumpPath = Graphics.ShadersDumpPath, + LoggingEnableDebug = Logger.EnableDebug, + LoggingEnableStub = Logger.EnableStub, + LoggingEnableInfo = Logger.EnableInfo, + LoggingEnableWarn = Logger.EnableWarn, + LoggingEnableError = Logger.EnableError, + LoggingEnableTrace = Logger.EnableTrace, + LoggingEnableGuest = Logger.EnableGuest, + LoggingEnableFsAccessLog = Logger.EnableFsAccessLog, + LoggingFilteredClasses = Logger.FilteredClasses, + LoggingGraphicsDebugLevel = Logger.GraphicsDebugLevel, + SystemLanguage = System.Language, + SystemRegion = System.Region, + SystemTimeZone = System.TimeZone, + SystemTimeOffset = System.SystemTimeOffset, + DockedMode = System.EnableDockedMode, + EnableDiscordIntegration = EnableDiscordIntegration, + CheckUpdatesOnStart = CheckUpdatesOnStart, + ShowConfirmExit = ShowConfirmExit, + HideCursorOnIdle = HideCursorOnIdle, + EnableVsync = Graphics.EnableVsync, + EnableShaderCache = Graphics.EnableShaderCache, + EnableTextureRecompression = Graphics.EnableTextureRecompression, + EnablePtc = System.EnablePtc, + EnableInternetAccess = System.EnableInternetAccess, + EnableFsIntegrityChecks = System.EnableFsIntegrityChecks, + FsGlobalAccessLogMode = System.FsGlobalAccessLogMode, + AudioBackend = System.AudioBackend, + AudioVolume = System.AudioVolume, + MemoryManagerMode = System.MemoryManagerMode, + ExpandRam = System.ExpandRam, + IgnoreMissingServices = System.IgnoreMissingServices, + GuiColumns = new GuiColumns { FavColumn = Ui.GuiColumns.FavColumn, IconColumn = Ui.GuiColumns.IconColumn, @@ -547,31 +569,33 @@ namespace Ryujinx.Ui.Common.Configuration LastPlayedColumn = Ui.GuiColumns.LastPlayedColumn, FileExtColumn = Ui.GuiColumns.FileExtColumn, FileSizeColumn = Ui.GuiColumns.FileSizeColumn, - PathColumn = Ui.GuiColumns.PathColumn, + PathColumn = Ui.GuiColumns.PathColumn }, - ColumnSort = new ColumnSort + ColumnSort = new ColumnSort { SortColumnId = Ui.ColumnSort.SortColumnId, SortAscending = Ui.ColumnSort.SortAscending }, - GameDirs = Ui.GameDirs, - LanguageCode = Ui.LanguageCode, - EnableCustomTheme = Ui.EnableCustomTheme, - CustomThemePath = Ui.CustomThemePath, - BaseStyle = Ui.BaseStyle, - GameListViewMode = Ui.GameListViewMode, - ShowNames = Ui.ShowNames, - GridSize = Ui.GridSize, - ApplicationSort = Ui.ApplicationSort, - IsAscendingOrder = Ui.IsAscendingOrder, - StartFullscreen = Ui.StartFullscreen, - ShowConsole = Ui.ShowConsole, - EnableKeyboard = Hid.EnableKeyboard, - EnableMouse = Hid.EnableMouse, - Hotkeys = Hid.Hotkeys, - KeyboardConfig = new List(), - ControllerConfig = new List(), - InputConfig = Hid.InputConfig, + GameDirs = Ui.GameDirs, + LanguageCode = Ui.LanguageCode, + EnableCustomTheme = Ui.EnableCustomTheme, + CustomThemePath = Ui.CustomThemePath, + BaseStyle = Ui.BaseStyle, + GameListViewMode = Ui.GameListViewMode, + ShowNames = Ui.ShowNames, + GridSize = Ui.GridSize, + ApplicationSort = Ui.ApplicationSort, + IsAscendingOrder = Ui.IsAscendingOrder, + StartFullscreen = Ui.StartFullscreen, + ShowConsole = Ui.ShowConsole, + EnableKeyboard = Hid.EnableKeyboard, + EnableMouse = Hid.EnableMouse, + Hotkeys = Hid.Hotkeys, + KeyboardConfig = new List(), + ControllerConfig = new List(), + InputConfig = Hid.InputConfig, + GraphicsBackend = Graphics.GraphicsBackend, + PreferredGpu = Graphics.PreferredGpu }; return configurationFile; @@ -579,69 +603,72 @@ namespace Ryujinx.Ui.Common.Configuration public void LoadDefault() { - Logger.EnableFileLog.Value = true; - Graphics.BackendThreading.Value = BackendThreading.Auto; - Graphics.ResScale.Value = 1; - Graphics.ResScaleCustom.Value = 1.0f; - Graphics.MaxAnisotropy.Value = -1.0f; - Graphics.AspectRatio.Value = AspectRatio.Fixed16x9; - Graphics.ShadersDumpPath.Value = ""; - Logger.EnableDebug.Value = false; - Logger.EnableStub.Value = true; - Logger.EnableInfo.Value = true; - Logger.EnableWarn.Value = true; - Logger.EnableError.Value = true; - Logger.EnableTrace.Value = false; - Logger.EnableGuest.Value = true; - Logger.EnableFsAccessLog.Value = false; - Logger.FilteredClasses.Value = Array.Empty(); - Logger.GraphicsDebugLevel.Value = GraphicsDebugLevel.None; - System.Language.Value = Language.AmericanEnglish; - System.Region.Value = Region.USA; - System.TimeZone.Value = "UTC"; - System.SystemTimeOffset.Value = 0; - System.EnableDockedMode.Value = true; - EnableDiscordIntegration.Value = true; - CheckUpdatesOnStart.Value = true; - ShowConfirmExit.Value = true; - HideCursorOnIdle.Value = false; - Graphics.EnableVsync.Value = true; - Graphics.EnableShaderCache.Value = true; - System.EnablePtc.Value = true; - System.EnableInternetAccess.Value = false; - System.EnableFsIntegrityChecks.Value = true; - System.FsGlobalAccessLogMode.Value = 0; - System.AudioBackend.Value = AudioBackend.SDL2; - System.AudioVolume.Value = 1; - System.MemoryManagerMode.Value = MemoryManagerMode.HostMappedUnsafe; - System.ExpandRam.Value = false; - System.IgnoreMissingServices.Value = false; - Ui.GuiColumns.FavColumn.Value = true; - Ui.GuiColumns.IconColumn.Value = true; - Ui.GuiColumns.AppColumn.Value = true; - Ui.GuiColumns.DevColumn.Value = true; - Ui.GuiColumns.VersionColumn.Value = true; - Ui.GuiColumns.TimePlayedColumn.Value = true; - Ui.GuiColumns.LastPlayedColumn.Value = true; - Ui.GuiColumns.FileExtColumn.Value = true; - Ui.GuiColumns.FileSizeColumn.Value = true; - Ui.GuiColumns.PathColumn.Value = true; - Ui.ColumnSort.SortColumnId.Value = 0; - Ui.ColumnSort.SortAscending.Value = false; - Ui.GameDirs.Value = new List(); - Ui.EnableCustomTheme.Value = false; - Ui.LanguageCode.Value = "en_US"; - Ui.CustomThemePath.Value = ""; - Ui.BaseStyle.Value = "Dark"; - Ui.GameListViewMode.Value = 0; - Ui.ShowNames.Value = true; - Ui.GridSize.Value = 2; - Ui.ApplicationSort.Value = 0; - Ui.IsAscendingOrder.Value = true; - Ui.StartFullscreen.Value = false; - Ui.ShowConsole.Value = true; - Hid.EnableKeyboard.Value = false; - Hid.EnableMouse.Value = false; + Logger.EnableFileLog.Value = true; + Graphics.BackendThreading.Value = BackendThreading.Auto; + Graphics.ResScale.Value = 1; + Graphics.ResScaleCustom.Value = 1.0f; + Graphics.MaxAnisotropy.Value = -1.0f; + Graphics.AspectRatio.Value = AspectRatio.Fixed16x9; + Graphics.GraphicsBackend.Value = GraphicsBackend.OpenGl; + Graphics.PreferredGpu.Value = ""; + Graphics.ShadersDumpPath.Value = ""; + Logger.EnableDebug.Value = false; + Logger.EnableStub.Value = true; + Logger.EnableInfo.Value = true; + Logger.EnableWarn.Value = true; + Logger.EnableError.Value = true; + Logger.EnableTrace.Value = false; + Logger.EnableGuest.Value = true; + Logger.EnableFsAccessLog.Value = false; + Logger.FilteredClasses.Value = Array.Empty(); + Logger.GraphicsDebugLevel.Value = GraphicsDebugLevel.None; + System.Language.Value = Language.AmericanEnglish; + System.Region.Value = Region.USA; + System.TimeZone.Value = "UTC"; + System.SystemTimeOffset.Value = 0; + System.EnableDockedMode.Value = true; + EnableDiscordIntegration.Value = true; + CheckUpdatesOnStart.Value = true; + ShowConfirmExit.Value = true; + HideCursorOnIdle.Value = false; + Graphics.EnableVsync.Value = true; + Graphics.EnableShaderCache.Value = true; + Graphics.EnableTextureRecompression.Value = false; + System.EnablePtc.Value = true; + System.EnableInternetAccess.Value = false; + System.EnableFsIntegrityChecks.Value = true; + System.FsGlobalAccessLogMode.Value = 0; + System.AudioBackend.Value = AudioBackend.SDL2; + System.AudioVolume.Value = 1; + System.MemoryManagerMode.Value = MemoryManagerMode.HostMappedUnsafe; + System.ExpandRam.Value = false; + System.IgnoreMissingServices.Value = false; + Ui.GuiColumns.FavColumn.Value = true; + Ui.GuiColumns.IconColumn.Value = true; + Ui.GuiColumns.AppColumn.Value = true; + Ui.GuiColumns.DevColumn.Value = true; + Ui.GuiColumns.VersionColumn.Value = true; + Ui.GuiColumns.TimePlayedColumn.Value = true; + Ui.GuiColumns.LastPlayedColumn.Value = true; + Ui.GuiColumns.FileExtColumn.Value = true; + Ui.GuiColumns.FileSizeColumn.Value = true; + Ui.GuiColumns.PathColumn.Value = true; + Ui.ColumnSort.SortColumnId.Value = 0; + Ui.ColumnSort.SortAscending.Value = false; + Ui.GameDirs.Value = new List(); + Ui.EnableCustomTheme.Value = false; + Ui.LanguageCode.Value = "en_US"; + Ui.CustomThemePath.Value = ""; + Ui.BaseStyle.Value = "Dark"; + Ui.GameListViewMode.Value = 0; + Ui.ShowNames.Value = true; + Ui.GridSize.Value = 2; + Ui.ApplicationSort.Value = 0; + Ui.IsAscendingOrder.Value = true; + Ui.StartFullscreen.Value = false; + Ui.ShowConsole.Value = true; + Hid.EnableKeyboard.Value = false; + Hid.EnableMouse.Value = false; Hid.Hotkeys.Value = new KeyboardHotkeys { ToggleVsync = Key.Tab, @@ -967,7 +994,7 @@ namespace Ryujinx.Ui.Common.Configuration if (configurationFileFormat.Version < 29) { Ryujinx.Common.Logging.Logger.Warning?.Print(LogClass.Application, $"Outdated configuration version {configurationFileFormat.Version}, migrating to version 29."); - + configurationFileFormat.Hotkeys = new KeyboardHotkeys { ToggleVsync = Key.Tab, @@ -1096,71 +1123,74 @@ namespace Ryujinx.Ui.Common.Configuration configurationFileUpdated = true; } - Logger.EnableFileLog.Value = configurationFileFormat.EnableFileLog; - Graphics.ResScale.Value = configurationFileFormat.ResScale; - Graphics.ResScaleCustom.Value = configurationFileFormat.ResScaleCustom; - Graphics.MaxAnisotropy.Value = configurationFileFormat.MaxAnisotropy; - Graphics.AspectRatio.Value = configurationFileFormat.AspectRatio; - Graphics.ShadersDumpPath.Value = configurationFileFormat.GraphicsShadersDumpPath; - Graphics.BackendThreading.Value = configurationFileFormat.BackendThreading; - Logger.EnableDebug.Value = configurationFileFormat.LoggingEnableDebug; - Logger.EnableStub.Value = configurationFileFormat.LoggingEnableStub; - Logger.EnableInfo.Value = configurationFileFormat.LoggingEnableInfo; - Logger.EnableWarn.Value = configurationFileFormat.LoggingEnableWarn; - Logger.EnableError.Value = configurationFileFormat.LoggingEnableError; - Logger.EnableTrace.Value = configurationFileFormat.LoggingEnableTrace; - Logger.EnableGuest.Value = configurationFileFormat.LoggingEnableGuest; - Logger.EnableFsAccessLog.Value = configurationFileFormat.LoggingEnableFsAccessLog; - Logger.FilteredClasses.Value = configurationFileFormat.LoggingFilteredClasses; - Logger.GraphicsDebugLevel.Value = configurationFileFormat.LoggingGraphicsDebugLevel; - System.Language.Value = configurationFileFormat.SystemLanguage; - System.Region.Value = configurationFileFormat.SystemRegion; - System.TimeZone.Value = configurationFileFormat.SystemTimeZone; - System.SystemTimeOffset.Value = configurationFileFormat.SystemTimeOffset; - System.EnableDockedMode.Value = configurationFileFormat.DockedMode; - EnableDiscordIntegration.Value = configurationFileFormat.EnableDiscordIntegration; - CheckUpdatesOnStart.Value = configurationFileFormat.CheckUpdatesOnStart; - ShowConfirmExit.Value = configurationFileFormat.ShowConfirmExit; - HideCursorOnIdle.Value = configurationFileFormat.HideCursorOnIdle; - Graphics.EnableVsync.Value = configurationFileFormat.EnableVsync; - Graphics.EnableShaderCache.Value = configurationFileFormat.EnableShaderCache; - System.EnablePtc.Value = configurationFileFormat.EnablePtc; - System.EnableInternetAccess.Value = configurationFileFormat.EnableInternetAccess; - System.EnableFsIntegrityChecks.Value = configurationFileFormat.EnableFsIntegrityChecks; - System.FsGlobalAccessLogMode.Value = configurationFileFormat.FsGlobalAccessLogMode; - System.AudioBackend.Value = configurationFileFormat.AudioBackend; - System.AudioVolume.Value = configurationFileFormat.AudioVolume; - System.MemoryManagerMode.Value = configurationFileFormat.MemoryManagerMode; - System.ExpandRam.Value = configurationFileFormat.ExpandRam; - System.IgnoreMissingServices.Value = configurationFileFormat.IgnoreMissingServices; - Ui.GuiColumns.FavColumn.Value = configurationFileFormat.GuiColumns.FavColumn; - Ui.GuiColumns.IconColumn.Value = configurationFileFormat.GuiColumns.IconColumn; - Ui.GuiColumns.AppColumn.Value = configurationFileFormat.GuiColumns.AppColumn; - Ui.GuiColumns.DevColumn.Value = configurationFileFormat.GuiColumns.DevColumn; - Ui.GuiColumns.VersionColumn.Value = configurationFileFormat.GuiColumns.VersionColumn; - Ui.GuiColumns.TimePlayedColumn.Value = configurationFileFormat.GuiColumns.TimePlayedColumn; - Ui.GuiColumns.LastPlayedColumn.Value = configurationFileFormat.GuiColumns.LastPlayedColumn; - Ui.GuiColumns.FileExtColumn.Value = configurationFileFormat.GuiColumns.FileExtColumn; - Ui.GuiColumns.FileSizeColumn.Value = configurationFileFormat.GuiColumns.FileSizeColumn; - Ui.GuiColumns.PathColumn.Value = configurationFileFormat.GuiColumns.PathColumn; - Ui.ColumnSort.SortColumnId.Value = configurationFileFormat.ColumnSort.SortColumnId; - Ui.ColumnSort.SortAscending.Value = configurationFileFormat.ColumnSort.SortAscending; - Ui.GameDirs.Value = configurationFileFormat.GameDirs; - Ui.EnableCustomTheme.Value = configurationFileFormat.EnableCustomTheme; - Ui.LanguageCode.Value = configurationFileFormat.LanguageCode; - Ui.CustomThemePath.Value = configurationFileFormat.CustomThemePath; - Ui.BaseStyle.Value = configurationFileFormat.BaseStyle; - Ui.GameListViewMode.Value = configurationFileFormat.GameListViewMode; - Ui.ShowNames.Value = configurationFileFormat.ShowNames; - Ui.IsAscendingOrder.Value = configurationFileFormat.IsAscendingOrder; - Ui.GridSize.Value = configurationFileFormat.GridSize; - Ui.ApplicationSort.Value = configurationFileFormat.ApplicationSort; - Ui.StartFullscreen.Value = configurationFileFormat.StartFullscreen; - Ui.ShowConsole.Value = configurationFileFormat.ShowConsole; - Hid.EnableKeyboard.Value = configurationFileFormat.EnableKeyboard; - Hid.EnableMouse.Value = configurationFileFormat.EnableMouse; - Hid.Hotkeys.Value = configurationFileFormat.Hotkeys; - Hid.InputConfig.Value = configurationFileFormat.InputConfig; + Logger.EnableFileLog.Value = configurationFileFormat.EnableFileLog; + Graphics.ResScale.Value = configurationFileFormat.ResScale; + Graphics.ResScaleCustom.Value = configurationFileFormat.ResScaleCustom; + Graphics.MaxAnisotropy.Value = configurationFileFormat.MaxAnisotropy; + Graphics.AspectRatio.Value = configurationFileFormat.AspectRatio; + Graphics.ShadersDumpPath.Value = configurationFileFormat.GraphicsShadersDumpPath; + Graphics.BackendThreading.Value = configurationFileFormat.BackendThreading; + Graphics.GraphicsBackend.Value = configurationFileFormat.GraphicsBackend; + Graphics.PreferredGpu.Value = configurationFileFormat.PreferredGpu; + Logger.EnableDebug.Value = configurationFileFormat.LoggingEnableDebug; + Logger.EnableStub.Value = configurationFileFormat.LoggingEnableStub; + Logger.EnableInfo.Value = configurationFileFormat.LoggingEnableInfo; + Logger.EnableWarn.Value = configurationFileFormat.LoggingEnableWarn; + Logger.EnableError.Value = configurationFileFormat.LoggingEnableError; + Logger.EnableTrace.Value = configurationFileFormat.LoggingEnableTrace; + Logger.EnableGuest.Value = configurationFileFormat.LoggingEnableGuest; + Logger.EnableFsAccessLog.Value = configurationFileFormat.LoggingEnableFsAccessLog; + Logger.FilteredClasses.Value = configurationFileFormat.LoggingFilteredClasses; + Logger.GraphicsDebugLevel.Value = configurationFileFormat.LoggingGraphicsDebugLevel; + System.Language.Value = configurationFileFormat.SystemLanguage; + System.Region.Value = configurationFileFormat.SystemRegion; + System.TimeZone.Value = configurationFileFormat.SystemTimeZone; + System.SystemTimeOffset.Value = configurationFileFormat.SystemTimeOffset; + System.EnableDockedMode.Value = configurationFileFormat.DockedMode; + EnableDiscordIntegration.Value = configurationFileFormat.EnableDiscordIntegration; + CheckUpdatesOnStart.Value = configurationFileFormat.CheckUpdatesOnStart; + ShowConfirmExit.Value = configurationFileFormat.ShowConfirmExit; + HideCursorOnIdle.Value = configurationFileFormat.HideCursorOnIdle; + Graphics.EnableVsync.Value = configurationFileFormat.EnableVsync; + Graphics.EnableShaderCache.Value = configurationFileFormat.EnableShaderCache; + Graphics.EnableTextureRecompression.Value = configurationFileFormat.EnableTextureRecompression; + System.EnablePtc.Value = configurationFileFormat.EnablePtc; + System.EnableInternetAccess.Value = configurationFileFormat.EnableInternetAccess; + System.EnableFsIntegrityChecks.Value = configurationFileFormat.EnableFsIntegrityChecks; + System.FsGlobalAccessLogMode.Value = configurationFileFormat.FsGlobalAccessLogMode; + System.AudioBackend.Value = configurationFileFormat.AudioBackend; + System.AudioVolume.Value = configurationFileFormat.AudioVolume; + System.MemoryManagerMode.Value = configurationFileFormat.MemoryManagerMode; + System.ExpandRam.Value = configurationFileFormat.ExpandRam; + System.IgnoreMissingServices.Value = configurationFileFormat.IgnoreMissingServices; + Ui.GuiColumns.FavColumn.Value = configurationFileFormat.GuiColumns.FavColumn; + Ui.GuiColumns.IconColumn.Value = configurationFileFormat.GuiColumns.IconColumn; + Ui.GuiColumns.AppColumn.Value = configurationFileFormat.GuiColumns.AppColumn; + Ui.GuiColumns.DevColumn.Value = configurationFileFormat.GuiColumns.DevColumn; + Ui.GuiColumns.VersionColumn.Value = configurationFileFormat.GuiColumns.VersionColumn; + Ui.GuiColumns.TimePlayedColumn.Value = configurationFileFormat.GuiColumns.TimePlayedColumn; + Ui.GuiColumns.LastPlayedColumn.Value = configurationFileFormat.GuiColumns.LastPlayedColumn; + Ui.GuiColumns.FileExtColumn.Value = configurationFileFormat.GuiColumns.FileExtColumn; + Ui.GuiColumns.FileSizeColumn.Value = configurationFileFormat.GuiColumns.FileSizeColumn; + Ui.GuiColumns.PathColumn.Value = configurationFileFormat.GuiColumns.PathColumn; + Ui.ColumnSort.SortColumnId.Value = configurationFileFormat.ColumnSort.SortColumnId; + Ui.ColumnSort.SortAscending.Value = configurationFileFormat.ColumnSort.SortAscending; + Ui.GameDirs.Value = configurationFileFormat.GameDirs; + Ui.EnableCustomTheme.Value = configurationFileFormat.EnableCustomTheme; + Ui.LanguageCode.Value = configurationFileFormat.LanguageCode; + Ui.CustomThemePath.Value = configurationFileFormat.CustomThemePath; + Ui.BaseStyle.Value = configurationFileFormat.BaseStyle; + Ui.GameListViewMode.Value = configurationFileFormat.GameListViewMode; + Ui.ShowNames.Value = configurationFileFormat.ShowNames; + Ui.IsAscendingOrder.Value = configurationFileFormat.IsAscendingOrder; + Ui.GridSize.Value = configurationFileFormat.GridSize; + Ui.ApplicationSort.Value = configurationFileFormat.ApplicationSort; + Ui.StartFullscreen.Value = configurationFileFormat.StartFullscreen; + Ui.ShowConsole.Value = configurationFileFormat.ShowConsole; + Hid.EnableKeyboard.Value = configurationFileFormat.EnableKeyboard; + Hid.EnableMouse.Value = configurationFileFormat.EnableMouse; + Hid.Hotkeys.Value = configurationFileFormat.Hotkeys; + Hid.InputConfig.Value = configurationFileFormat.InputConfig; if (Hid.InputConfig.Value == null) { diff --git a/Ryujinx.sln b/Ryujinx.sln index 92d4e5b4f..1332e129b 100644 --- a/Ryujinx.sln +++ b/Ryujinx.sln @@ -74,6 +74,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Ui.Common", "Ryujin EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ryujinx.Horizon.Generators", "Ryujinx.Horizon.Generators\Ryujinx.Horizon.Generators.csproj", "{6AE2A5E8-4C5A-48B9-997B-E1455C0355C6}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Vulkan", "Ryujinx.Graphics.Vulkan\Ryujinx.Graphics.Vulkan.csproj", "{D4D09B08-D580-4D69-B886-C35D2853F6C8}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Spv.Generator", "Spv.Generator\Spv.Generator.csproj", "{2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -212,6 +216,14 @@ Global {6AE2A5E8-4C5A-48B9-997B-E1455C0355C6}.Debug|Any CPU.Build.0 = Debug|Any CPU {6AE2A5E8-4C5A-48B9-997B-E1455C0355C6}.Release|Any CPU.ActiveCfg = Release|Any CPU {6AE2A5E8-4C5A-48B9-997B-E1455C0355C6}.Release|Any CPU.Build.0 = Release|Any CPU + {D4D09B08-D580-4D69-B886-C35D2853F6C8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D4D09B08-D580-4D69-B886-C35D2853F6C8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D4D09B08-D580-4D69-B886-C35D2853F6C8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D4D09B08-D580-4D69-B886-C35D2853F6C8}.Release|Any CPU.Build.0 = Release|Any CPU + {2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2BCB3D7A-38C0-4FE7-8FDA-374C6AD56D0E}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Ryujinx/Ryujinx.csproj b/Ryujinx/Ryujinx.csproj index fc8e33fee..8e8eb28c4 100644 --- a/Ryujinx/Ryujinx.csproj +++ b/Ryujinx/Ryujinx.csproj @@ -21,6 +21,7 @@ + @@ -39,6 +40,7 @@ + diff --git a/Ryujinx/Ui/GLRenderer.cs b/Ryujinx/Ui/GLRenderer.cs index 45eb53cb8..f41ce38c1 100644 --- a/Ryujinx/Ui/GLRenderer.cs +++ b/Ryujinx/Ui/GLRenderer.cs @@ -127,9 +127,9 @@ namespace Ryujinx.Ui _nativeWindow.SwapBuffers(); } - public override string GetGpuVendorName() + protected override string GetGpuBackendName() { - return ((Renderer)Renderer).GpuVendor; + return "OpenGL"; } protected override void Dispose(bool disposing) diff --git a/Ryujinx/Ui/MainWindow.cs b/Ryujinx/Ui/MainWindow.cs index afd37d3f0..e8e200db7 100644 --- a/Ryujinx/Ui/MainWindow.cs +++ b/Ryujinx/Ui/MainWindow.cs @@ -19,6 +19,7 @@ using Ryujinx.Common.System; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.GAL.Multithreading; using Ryujinx.Graphics.OpenGL; +using Ryujinx.Graphics.Vulkan; using Ryujinx.HLE.FileSystem; using Ryujinx.HLE.HOS; using Ryujinx.HLE.HOS.Services.Account.Acc; @@ -36,6 +37,8 @@ using Ryujinx.Ui.Common.Helper; using Ryujinx.Ui.Helper; using Ryujinx.Ui.Widgets; using Ryujinx.Ui.Windows; +using Silk.NET.Vulkan; +using SPB.Graphics.Vulkan; using System; using System.Diagnostics; using System.IO; @@ -81,8 +84,6 @@ namespace Ryujinx.Ui public bool IsFocused; - private static bool UseVulkan = false; - #pragma warning disable CS0169, CS0649, IDE0044 [GUI] public MenuItem ExitMenuItem; @@ -120,6 +121,7 @@ namespace Ryujinx.Ui [GUI] CheckMenuItem _fileExtToggle; [GUI] CheckMenuItem _pathToggle; [GUI] CheckMenuItem _fileSizeToggle; + [GUI] Label _gpuBackend; [GUI] Label _dockedMode; [GUI] Label _aspectRatio; [GUI] Label _gameStatus; @@ -406,9 +408,10 @@ namespace Ryujinx.Ui IRenderer renderer; - if (UseVulkan) + if (ConfigurationState.Instance.Graphics.GraphicsBackend == GraphicsBackend.Vulkan) { - throw new NotImplementedException(); + string preferredGpu = ConfigurationState.Instance.Graphics.PreferredGpu.Value; + renderer = new VulkanGraphicsDevice(CreateVulkanSurface, VulkanHelper.GetRequiredInstanceExtensions, preferredGpu); } else { @@ -579,6 +582,11 @@ namespace Ryujinx.Ui _emulationContext = new HLE.Switch(configuration); } + private SurfaceKHR CreateVulkanSurface(Instance instance, Vk vk) + { + return new SurfaceKHR((ulong)((VKRenderer)RendererWidget).CreateWindowSurface(instance.Handle)); + } + private void SetupProgressUiHandlers() { Ptc.PtcStateChanged -= ProgressHandler; @@ -875,7 +883,7 @@ namespace Ryujinx.Ui private RendererWidgetBase CreateRendererWidget() { - if (UseVulkan) + if (ConfigurationState.Instance.Graphics.GraphicsBackend == GraphicsBackend.Vulkan) { return new VKRenderer(InputManager, ConfigurationState.Instance.Logger.GraphicsDebugLevel); } @@ -946,8 +954,8 @@ namespace Ryujinx.Ui UpdateColumns(); UpdateGameTable(); - Task.Run(RefreshFirmwareLabel); - Task.Run(HandleRelaunch); + RefreshFirmwareLabel(); + HandleRelaunch(); _actionMenu.Sensitive = false; _firmwareInstallFile.Sensitive = true; @@ -1031,10 +1039,11 @@ namespace Ryujinx.Ui int resScale = ConfigurationState.Instance.Graphics.ResScale; float resScaleCustom = ConfigurationState.Instance.Graphics.ResScaleCustom; - Graphics.Gpu.GraphicsConfig.ResScale = (resScale == -1) ? resScaleCustom : resScale; - Graphics.Gpu.GraphicsConfig.MaxAnisotropy = ConfigurationState.Instance.Graphics.MaxAnisotropy; - Graphics.Gpu.GraphicsConfig.ShadersDumpPath = ConfigurationState.Instance.Graphics.ShadersDumpPath; - Graphics.Gpu.GraphicsConfig.EnableShaderCache = ConfigurationState.Instance.Graphics.EnableShaderCache; + Graphics.Gpu.GraphicsConfig.ResScale = (resScale == -1) ? resScaleCustom : resScale; + Graphics.Gpu.GraphicsConfig.MaxAnisotropy = ConfigurationState.Instance.Graphics.MaxAnisotropy; + Graphics.Gpu.GraphicsConfig.ShadersDumpPath = ConfigurationState.Instance.Graphics.ShadersDumpPath; + Graphics.Gpu.GraphicsConfig.EnableShaderCache = ConfigurationState.Instance.Graphics.EnableShaderCache; + Graphics.Gpu.GraphicsConfig.EnableTextureRecompression = ConfigurationState.Instance.Graphics.EnableTextureRecompression; } public void SaveConfig() @@ -1125,6 +1134,7 @@ namespace Ryujinx.Ui _gpuName.Text = args.GpuName; _dockedMode.Text = args.DockedMode; _aspectRatio.Text = args.AspectRatio; + _gpuBackend.Text = args.GpuBackend; _volumeStatus.Text = GetVolumeLabelText(args.Volume); if (args.VSyncEnabled) diff --git a/Ryujinx/Ui/MainWindow.glade b/Ryujinx/Ui/MainWindow.glade index b7d93e9bd..3c318a76d 100644 --- a/Ryujinx/Ui/MainWindow.glade +++ b/Ryujinx/Ui/MainWindow.glade @@ -774,6 +774,31 @@ 11 + + + True + False + start + 5 + 5 + + + False + True + 12 + + + + + True + False + + + False + True + 13 + + True @@ -785,7 +810,7 @@ True True - 12 + 14 diff --git a/Ryujinx/Ui/RendererWidgetBase.cs b/Ryujinx/Ui/RendererWidgetBase.cs index 0d82a5c57..1e6099b4f 100644 --- a/Ryujinx/Ui/RendererWidgetBase.cs +++ b/Ryujinx/Ui/RendererWidgetBase.cs @@ -72,6 +72,7 @@ namespace Ryujinx.Ui private InputManager _inputManager; private IKeyboard _keyboardInterface; private GraphicsDebugLevel _glLogLevel; + private string _gpuBackendName; private string _gpuVendorName; private bool _isMouseInClient; @@ -118,7 +119,12 @@ namespace Ryujinx.Ui public abstract void SwapBuffers(object image); - public abstract string GetGpuVendorName(); + protected abstract string GetGpuBackendName(); + + private string GetGpuVendorName() + { + return Renderer.GetHardwareInfo().GpuVendor; + } private void HideCursorStateChanged(object sender, ReactiveEventArgs state) { @@ -225,7 +231,7 @@ namespace Ryujinx.Ui WindowWidth = evnt.Width * monitor.ScaleFactor; WindowHeight = evnt.Height * monitor.ScaleFactor; - Renderer?.Window.SetSize(WindowWidth, WindowHeight); + Renderer?.Window?.SetSize(WindowWidth, WindowHeight); return result; } @@ -306,7 +312,7 @@ namespace Ryujinx.Ui } Renderer = renderer; - Renderer?.Window.SetSize(WindowWidth, WindowHeight); + Renderer?.Window?.SetSize(WindowWidth, WindowHeight); if (Renderer != null) { @@ -385,6 +391,7 @@ namespace Ryujinx.Ui Device.Gpu.Renderer.Initialize(_glLogLevel); + _gpuBackendName = GetGpuBackendName(); _gpuVendorName = GetGpuVendorName(); Device.Gpu.Renderer.RunLoop(() => @@ -430,6 +437,7 @@ namespace Ryujinx.Ui StatusUpdatedEvent?.Invoke(this, new StatusUpdatedEventArgs( Device.EnableDeviceVsync, Device.GetVolume(), + _gpuBackendName, dockedMode, ConfigurationState.Instance.Graphics.AspectRatio.Value.ToText(), $"Game: {Device.Statistics.GetGameFrameRate():00.00} FPS ({Device.Statistics.GetGameFrameTime():00.00} ms)", diff --git a/Ryujinx/Ui/StatusUpdatedEventArgs.cs b/Ryujinx/Ui/StatusUpdatedEventArgs.cs index df83efa41..046597b07 100644 --- a/Ryujinx/Ui/StatusUpdatedEventArgs.cs +++ b/Ryujinx/Ui/StatusUpdatedEventArgs.cs @@ -11,11 +11,13 @@ namespace Ryujinx.Ui public string GameStatus; public string FifoStatus; public string GpuName; + public string GpuBackend; - public StatusUpdatedEventArgs(bool vSyncEnabled, float volume, string dockedMode, string aspectRatio, string gameStatus, string fifoStatus, string gpuName) + public StatusUpdatedEventArgs(bool vSyncEnabled, float volume, string gpuBackend, string dockedMode, string aspectRatio, string gameStatus, string fifoStatus, string gpuName) { VSyncEnabled = vSyncEnabled; Volume = volume; + GpuBackend = gpuBackend; DockedMode = dockedMode; AspectRatio = aspectRatio; GameStatus = gameStatus; diff --git a/Ryujinx/Ui/VKRenderer.cs b/Ryujinx/Ui/VKRenderer.cs index f6dd9ec4f..d2f60de84 100644 --- a/Ryujinx/Ui/VKRenderer.cs +++ b/Ryujinx/Ui/VKRenderer.cs @@ -66,9 +66,9 @@ namespace Ryujinx.Ui public override void SwapBuffers(object image) { } - public override string GetGpuVendorName() + protected override string GetGpuBackendName() { - return "Vulkan (Unknown)"; + return "Vulkan"; } protected override void Dispose(bool disposing) diff --git a/Ryujinx/Ui/Windows/SettingsWindow.cs b/Ryujinx/Ui/Windows/SettingsWindow.cs index fef1164b3..c339ed124 100644 --- a/Ryujinx/Ui/Windows/SettingsWindow.cs +++ b/Ryujinx/Ui/Windows/SettingsWindow.cs @@ -6,6 +6,7 @@ using Ryujinx.Audio.Backends.SoundIo; using Ryujinx.Common.Configuration; using Ryujinx.Common.Configuration.Hid; using Ryujinx.Common.GraphicsDriver; +using Ryujinx.Graphics.Vulkan; using Ryujinx.Ui.Common.Configuration; using Ryujinx.HLE.FileSystem; using Ryujinx.HLE.HOS.Services.Time.TimeZone; @@ -53,6 +54,7 @@ namespace Ryujinx.Ui.Windows [GUI] CheckButton _hideCursorOnIdleToggle; [GUI] CheckButton _vSyncToggle; [GUI] CheckButton _shaderCacheToggle; + [GUI] CheckButton _textureRecompressionToggle; [GUI] CheckButton _ptcToggle; [GUI] CheckButton _internetToggle; [GUI] CheckButton _fsicToggle; @@ -91,6 +93,8 @@ namespace Ryujinx.Ui.Windows [GUI] Entry _graphicsShadersDumpPath; [GUI] ComboBoxText _anisotropy; [GUI] ComboBoxText _aspectRatio; + [GUI] ComboBoxText _graphicsBackend; + [GUI] ComboBoxText _preferredGpu; [GUI] ComboBoxText _resScaleCombo; [GUI] Entry _resScaleText; [GUI] ToggleButton _configureController1; @@ -229,6 +233,11 @@ namespace Ryujinx.Ui.Windows _shaderCacheToggle.Click(); } + if (ConfigurationState.Instance.Graphics.EnableTextureRecompression) + { + _textureRecompressionToggle.Click(); + } + if (ConfigurationState.Instance.System.EnablePtc) { _ptcToggle.Click(); @@ -321,6 +330,11 @@ namespace Ryujinx.Ui.Windows _resScaleCombo.SetActiveId(ConfigurationState.Instance.Graphics.ResScale.Value.ToString()); _anisotropy.SetActiveId(ConfigurationState.Instance.Graphics.MaxAnisotropy.Value.ToString()); _aspectRatio.SetActiveId(((int)ConfigurationState.Instance.Graphics.AspectRatio.Value).ToString()); + _graphicsBackend.SetActiveId(((int)ConfigurationState.Instance.Graphics.GraphicsBackend.Value).ToString()); + + UpdatePreferredGpuComboBox(); + + _graphicsBackend.Changed += (sender, e) => UpdatePreferredGpuComboBox(); _custThemePath.Buffer.Text = ConfigurationState.Instance.Ui.CustomThemePath; _resScaleText.Buffer.Text = ConfigurationState.Instance.Graphics.ResScaleCustom.Value.ToString(); @@ -345,7 +359,7 @@ namespace Ryujinx.Ui.Windows _browseThemePath.Sensitive = false; } - //Setup system time spinners + // Setup system time spinners UpdateSystemTimeSpinners(); _audioBackendStore = new ListStore(typeof(string), typeof(AudioBackend)); @@ -419,6 +433,39 @@ namespace Ryujinx.Ui.Windows }); } + private void UpdatePreferredGpuComboBox() + { + _preferredGpu.RemoveAll(); + + if (Enum.Parse(_graphicsBackend.ActiveId) == GraphicsBackend.Vulkan) + { + var devices = VulkanGraphicsDevice.GetPhysicalDevices(); + string preferredGpuIdFromConfig = ConfigurationState.Instance.Graphics.PreferredGpu.Value; + string preferredGpuId = preferredGpuIdFromConfig; + bool noGpuId = string.IsNullOrEmpty(preferredGpuIdFromConfig); + + foreach (var device in devices) + { + string dGPU = device.IsDiscrete ? " (dGPU)" : ""; + _preferredGpu.Append(device.Id, $"{device.Name}{dGPU}"); + + // If there's no GPU selected yet, we just pick the first GPU. + // If there's a discrete GPU available, we always prefer that over the previous selection, + // as it is likely to have better performance and more features. + // If the configuration file already has a GPU selection, we always prefer that instead. + if (noGpuId && (string.IsNullOrEmpty(preferredGpuId) || device.IsDiscrete)) + { + preferredGpuId = device.Id; + } + } + + if (!string.IsNullOrEmpty(preferredGpuId)) + { + _preferredGpu.SetActiveId(preferredGpuId); + } + } + } + private void UpdateSystemTimeSpinners() { //Bind system time events @@ -492,45 +539,48 @@ namespace Ryujinx.Ui.Windows DriverUtilities.ToggleOGLThreading(backendThreading == BackendThreading.Off); } - ConfigurationState.Instance.Logger.EnableError.Value = _errorLogToggle.Active; - ConfigurationState.Instance.Logger.EnableTrace.Value = _traceLogToggle.Active; - ConfigurationState.Instance.Logger.EnableWarn.Value = _warningLogToggle.Active; - ConfigurationState.Instance.Logger.EnableInfo.Value = _infoLogToggle.Active; - ConfigurationState.Instance.Logger.EnableStub.Value = _stubLogToggle.Active; - ConfigurationState.Instance.Logger.EnableDebug.Value = _debugLogToggle.Active; - ConfigurationState.Instance.Logger.EnableGuest.Value = _guestLogToggle.Active; - ConfigurationState.Instance.Logger.EnableFsAccessLog.Value = _fsAccessLogToggle.Active; - ConfigurationState.Instance.Logger.EnableFileLog.Value = _fileLogToggle.Active; - ConfigurationState.Instance.Logger.GraphicsDebugLevel.Value = Enum.Parse(_graphicsDebugLevel.ActiveId); - ConfigurationState.Instance.System.EnableDockedMode.Value = _dockedModeToggle.Active; - ConfigurationState.Instance.EnableDiscordIntegration.Value = _discordToggle.Active; - ConfigurationState.Instance.CheckUpdatesOnStart.Value = _checkUpdatesToggle.Active; - ConfigurationState.Instance.ShowConfirmExit.Value = _showConfirmExitToggle.Active; - ConfigurationState.Instance.HideCursorOnIdle.Value = _hideCursorOnIdleToggle.Active; - ConfigurationState.Instance.Graphics.EnableVsync.Value = _vSyncToggle.Active; - ConfigurationState.Instance.Graphics.EnableShaderCache.Value = _shaderCacheToggle.Active; - ConfigurationState.Instance.System.EnablePtc.Value = _ptcToggle.Active; - ConfigurationState.Instance.System.EnableInternetAccess.Value = _internetToggle.Active; - ConfigurationState.Instance.System.EnableFsIntegrityChecks.Value = _fsicToggle.Active; - ConfigurationState.Instance.System.MemoryManagerMode.Value = memoryMode; - ConfigurationState.Instance.System.ExpandRam.Value = _expandRamToggle.Active; - ConfigurationState.Instance.System.IgnoreMissingServices.Value = _ignoreToggle.Active; - ConfigurationState.Instance.Hid.EnableKeyboard.Value = _directKeyboardAccess.Active; - ConfigurationState.Instance.Hid.EnableMouse.Value = _directMouseAccess.Active; - ConfigurationState.Instance.Ui.EnableCustomTheme.Value = _custThemeToggle.Active; - ConfigurationState.Instance.System.Language.Value = Enum.Parse(_systemLanguageSelect.ActiveId); - ConfigurationState.Instance.System.Region.Value = Enum.Parse(_systemRegionSelect.ActiveId); - ConfigurationState.Instance.System.SystemTimeOffset.Value = _systemTimeOffset; - ConfigurationState.Instance.Ui.CustomThemePath.Value = _custThemePath.Buffer.Text; - ConfigurationState.Instance.Graphics.ShadersDumpPath.Value = _graphicsShadersDumpPath.Buffer.Text; - ConfigurationState.Instance.Ui.GameDirs.Value = gameDirs; - ConfigurationState.Instance.System.FsGlobalAccessLogMode.Value = (int)_fsLogSpinAdjustment.Value; - ConfigurationState.Instance.Graphics.MaxAnisotropy.Value = float.Parse(_anisotropy.ActiveId, CultureInfo.InvariantCulture); - ConfigurationState.Instance.Graphics.AspectRatio.Value = Enum.Parse(_aspectRatio.ActiveId); - ConfigurationState.Instance.Graphics.BackendThreading.Value = backendThreading; - ConfigurationState.Instance.Graphics.ResScale.Value = int.Parse(_resScaleCombo.ActiveId); - ConfigurationState.Instance.Graphics.ResScaleCustom.Value = resScaleCustom; - ConfigurationState.Instance.System.AudioVolume.Value = (float)_audioVolumeSlider.Value / 100.0f; + ConfigurationState.Instance.Logger.EnableError.Value = _errorLogToggle.Active; + ConfigurationState.Instance.Logger.EnableTrace.Value = _traceLogToggle.Active; + ConfigurationState.Instance.Logger.EnableWarn.Value = _warningLogToggle.Active; + ConfigurationState.Instance.Logger.EnableInfo.Value = _infoLogToggle.Active; + ConfigurationState.Instance.Logger.EnableStub.Value = _stubLogToggle.Active; + ConfigurationState.Instance.Logger.EnableDebug.Value = _debugLogToggle.Active; + ConfigurationState.Instance.Logger.EnableGuest.Value = _guestLogToggle.Active; + ConfigurationState.Instance.Logger.EnableFsAccessLog.Value = _fsAccessLogToggle.Active; + ConfigurationState.Instance.Logger.EnableFileLog.Value = _fileLogToggle.Active; + ConfigurationState.Instance.Logger.GraphicsDebugLevel.Value = Enum.Parse(_graphicsDebugLevel.ActiveId); + ConfigurationState.Instance.System.EnableDockedMode.Value = _dockedModeToggle.Active; + ConfigurationState.Instance.EnableDiscordIntegration.Value = _discordToggle.Active; + ConfigurationState.Instance.CheckUpdatesOnStart.Value = _checkUpdatesToggle.Active; + ConfigurationState.Instance.ShowConfirmExit.Value = _showConfirmExitToggle.Active; + ConfigurationState.Instance.HideCursorOnIdle.Value = _hideCursorOnIdleToggle.Active; + ConfigurationState.Instance.Graphics.EnableVsync.Value = _vSyncToggle.Active; + ConfigurationState.Instance.Graphics.EnableShaderCache.Value = _shaderCacheToggle.Active; + ConfigurationState.Instance.Graphics.EnableTextureRecompression.Value = _textureRecompressionToggle.Active; + ConfigurationState.Instance.System.EnablePtc.Value = _ptcToggle.Active; + ConfigurationState.Instance.System.EnableInternetAccess.Value = _internetToggle.Active; + ConfigurationState.Instance.System.EnableFsIntegrityChecks.Value = _fsicToggle.Active; + ConfigurationState.Instance.System.MemoryManagerMode.Value = memoryMode; + ConfigurationState.Instance.System.ExpandRam.Value = _expandRamToggle.Active; + ConfigurationState.Instance.System.IgnoreMissingServices.Value = _ignoreToggle.Active; + ConfigurationState.Instance.Hid.EnableKeyboard.Value = _directKeyboardAccess.Active; + ConfigurationState.Instance.Hid.EnableMouse.Value = _directMouseAccess.Active; + ConfigurationState.Instance.Ui.EnableCustomTheme.Value = _custThemeToggle.Active; + ConfigurationState.Instance.System.Language.Value = Enum.Parse(_systemLanguageSelect.ActiveId); + ConfigurationState.Instance.System.Region.Value = Enum.Parse(_systemRegionSelect.ActiveId); + ConfigurationState.Instance.System.SystemTimeOffset.Value = _systemTimeOffset; + ConfigurationState.Instance.Ui.CustomThemePath.Value = _custThemePath.Buffer.Text; + ConfigurationState.Instance.Graphics.ShadersDumpPath.Value = _graphicsShadersDumpPath.Buffer.Text; + ConfigurationState.Instance.Ui.GameDirs.Value = gameDirs; + ConfigurationState.Instance.System.FsGlobalAccessLogMode.Value = (int)_fsLogSpinAdjustment.Value; + ConfigurationState.Instance.Graphics.MaxAnisotropy.Value = float.Parse(_anisotropy.ActiveId, CultureInfo.InvariantCulture); + ConfigurationState.Instance.Graphics.AspectRatio.Value = Enum.Parse(_aspectRatio.ActiveId); + ConfigurationState.Instance.Graphics.BackendThreading.Value = backendThreading; + ConfigurationState.Instance.Graphics.GraphicsBackend.Value = Enum.Parse(_graphicsBackend.ActiveId); + ConfigurationState.Instance.Graphics.PreferredGpu.Value = _preferredGpu.ActiveId; + ConfigurationState.Instance.Graphics.ResScale.Value = int.Parse(_resScaleCombo.ActiveId); + ConfigurationState.Instance.Graphics.ResScaleCustom.Value = resScaleCustom; + ConfigurationState.Instance.System.AudioVolume.Value = (float)_audioVolumeSlider.Value / 100.0f; _previousVolumeLevel = ConfigurationState.Instance.System.AudioVolume.Value; diff --git a/Ryujinx/Ui/Windows/SettingsWindow.glade b/Ryujinx/Ui/Windows/SettingsWindow.glade index 16f06aa49..d3b4f777b 100644 --- a/Ryujinx/Ui/Windows/SettingsWindow.glade +++ b/Ryujinx/Ui/Windows/SettingsWindow.glade @@ -1893,6 +1893,51 @@ + + False + True + 5 + 0 + + + + + True + False + 5 + 5 + + + True + False + Graphics Backend to use + Graphics Backend: + + + False + True + 5 + 0 + + + + + True + False + Graphics Backend to use + -1 + + Vulkan + OpenGL + + + + False + True + 1 + + + False True @@ -1900,6 +1945,47 @@ 1 + + + True + False + 5 + 5 + + + True + False + Preferred GPU (Vulkan only) + Preferred GPU: + + + False + True + 5 + 0 + + + + + True + False + Preferred GPU (Vulkan only) + -1 + + + False + True + 1 + + + + + False + True + 5 + 2 + + False @@ -1966,6 +2052,24 @@ 0 + + + Enable Texture Recompression + True + True + False + Enables or disables Texture Recompression. Reduces VRAM usage at the cost of texture quality, and may also increase stuttering + start + 5 + 5 + True + + + False + True + 1 + + True @@ -2027,7 +2131,7 @@ False True 5 - 1 + 2 @@ -2075,7 +2179,7 @@ False True 5 - 1 + 3 @@ -2124,7 +2228,7 @@ False True 5 - 3 + 4 @@ -2139,7 +2243,7 @@ False True 5 - 0 + 2 @@ -2151,7 +2255,7 @@ False True 5 - 1 + 3 @@ -2525,7 +2629,7 @@ True False Requires appropriate log levels enabled. - OpenGL Log Level + Graphics Backend Log Level False diff --git a/Spv.Generator/Autogenerated/CoreGrammar.cs b/Spv.Generator/Autogenerated/CoreGrammar.cs new file mode 100644 index 000000000..3b2f6fa65 --- /dev/null +++ b/Spv.Generator/Autogenerated/CoreGrammar.cs @@ -0,0 +1,5315 @@ +// AUTOGENERATED: DO NOT EDIT +// Last update date: 2021-01-06 23:02:26.837899 +#region Grammar License +// Copyright (c) 2014-2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. +#endregion + +using static Spv.Specification; + +namespace Spv.Generator +{ + public partial class Module + { + // Miscellaneous + + public Instruction Nop() + { + Instruction result = NewInstruction(Op.OpNop); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Undef(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpUndef, GetNewId(), resultType); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SizeOf(Instruction resultType, Instruction pointer) + { + Instruction result = NewInstruction(Op.OpSizeOf, GetNewId(), resultType); + + result.AddOperand(pointer); + AddToFunctionDefinitions(result); + + return result; + } + + // Debug + + public Instruction SourceContinued(string continuedSource) + { + Instruction result = NewInstruction(Op.OpSourceContinued); + + result.AddOperand(continuedSource); + AddDebug(result); + + return result; + } + + public Instruction Source(SourceLanguage sourceLanguage, LiteralInteger version, Instruction file = null, string source = null) + { + Instruction result = NewInstruction(Op.OpSource); + + result.AddOperand(sourceLanguage); + result.AddOperand(version); + if (file != null) + { + result.AddOperand(file); + } + if (source != null) + { + result.AddOperand(source); + } + AddDebug(result); + + return result; + } + + public Instruction SourceExtension(string extension) + { + Instruction result = NewInstruction(Op.OpSourceExtension); + + result.AddOperand(extension); + AddDebug(result); + + return result; + } + + public Instruction Name(Instruction target, string name) + { + Instruction result = NewInstruction(Op.OpName); + + result.AddOperand(target); + result.AddOperand(name); + AddDebug(result); + + return result; + } + + public Instruction MemberName(Instruction type, LiteralInteger member, string name) + { + Instruction result = NewInstruction(Op.OpMemberName); + + result.AddOperand(type); + result.AddOperand(member); + result.AddOperand(name); + AddDebug(result); + + return result; + } + + public Instruction String(string str) + { + Instruction result = NewInstruction(Op.OpString, GetNewId()); + + result.AddOperand(str); + AddDebug(result); + + return result; + } + + public Instruction Line(Instruction file, LiteralInteger line, LiteralInteger column) + { + Instruction result = NewInstruction(Op.OpLine); + + result.AddOperand(file); + result.AddOperand(line); + result.AddOperand(column); + AddDebug(result); + + return result; + } + + public Instruction NoLine() + { + Instruction result = NewInstruction(Op.OpNoLine); + + AddDebug(result); + + return result; + } + + public Instruction ModuleProcessed(string process) + { + Instruction result = NewInstruction(Op.OpModuleProcessed); + + result.AddOperand(process); + AddDebug(result); + + return result; + } + + // Annotation + + public Instruction Decorate(Instruction target, Decoration decoration) + { + Instruction result = NewInstruction(Op.OpDecorate); + + result.AddOperand(target); + result.AddOperand(decoration); + AddAnnotation(result); + + return result; + } + + public Instruction Decorate(Instruction target, Decoration decoration, Operand parameter) + { + Instruction result = NewInstruction(Op.OpDecorate); + + result.AddOperand(target); + result.AddOperand(decoration); + result.AddOperand(parameter); + AddAnnotation(result); + + return result; + } + + public Instruction Decorate(Instruction target, Decoration decoration, params Operand[] parameters) + { + Instruction result = NewInstruction(Op.OpDecorate); + + result.AddOperand(target); + result.AddOperand(decoration); + result.AddOperand(parameters); + AddAnnotation(result); + + return result; + } + + public Instruction MemberDecorate(Instruction structureType, LiteralInteger member, Decoration decoration) + { + Instruction result = NewInstruction(Op.OpMemberDecorate); + + result.AddOperand(structureType); + result.AddOperand(member); + result.AddOperand(decoration); + AddAnnotation(result); + + return result; + } + + public Instruction MemberDecorate(Instruction structureType, LiteralInteger member, Decoration decoration, Operand parameter) + { + Instruction result = NewInstruction(Op.OpMemberDecorate); + + result.AddOperand(structureType); + result.AddOperand(member); + result.AddOperand(decoration); + result.AddOperand(parameter); + AddAnnotation(result); + + return result; + } + + public Instruction MemberDecorate(Instruction structureType, LiteralInteger member, Decoration decoration, params Operand[] parameters) + { + Instruction result = NewInstruction(Op.OpMemberDecorate); + + result.AddOperand(structureType); + result.AddOperand(member); + result.AddOperand(decoration); + result.AddOperand(parameters); + AddAnnotation(result); + + return result; + } + + public Instruction DecorationGroup() + { + Instruction result = NewInstruction(Op.OpDecorationGroup, GetNewId()); + + AddAnnotation(result); + + return result; + } + + public Instruction GroupDecorate(Instruction decorationGroup, params Instruction[] targets) + { + Instruction result = NewInstruction(Op.OpGroupDecorate); + + result.AddOperand(decorationGroup); + result.AddOperand(targets); + AddAnnotation(result); + + return result; + } + + public Instruction GroupMemberDecorate(Instruction decorationGroup, params Operand[] targets) + { + Instruction result = NewInstruction(Op.OpGroupMemberDecorate); + + result.AddOperand(decorationGroup); + result.AddOperand(targets); + AddAnnotation(result); + + return result; + } + + public Instruction DecorateId(Instruction target, Decoration decoration, params Operand[] parameters) + { + Instruction result = NewInstruction(Op.OpDecorateId); + + result.AddOperand(target); + result.AddOperand(decoration); + result.AddOperand(parameters); + AddAnnotation(result); + + return result; + } + + public Instruction DecorateString(Instruction target, Decoration decoration, params Operand[] parameters) + { + Instruction result = NewInstruction(Op.OpDecorateString); + + result.AddOperand(target); + result.AddOperand(decoration); + result.AddOperand(parameters); + AddAnnotation(result); + + return result; + } + + public Instruction DecorateStringGOOGLE(Instruction target, Decoration decoration, params Operand[] parameters) + { + Instruction result = NewInstruction(Op.OpDecorateStringGOOGLE); + + result.AddOperand(target); + result.AddOperand(decoration); + result.AddOperand(parameters); + AddAnnotation(result); + + return result; + } + + public Instruction MemberDecorateString(Instruction structType, LiteralInteger member, Decoration decoration, params Operand[] parameters) + { + Instruction result = NewInstruction(Op.OpMemberDecorateString); + + result.AddOperand(structType); + result.AddOperand(member); + result.AddOperand(decoration); + result.AddOperand(parameters); + AddAnnotation(result); + + return result; + } + + public Instruction MemberDecorateStringGOOGLE(Instruction structType, LiteralInteger member, Decoration decoration, params Operand[] parameters) + { + Instruction result = NewInstruction(Op.OpMemberDecorateStringGOOGLE); + + result.AddOperand(structType); + result.AddOperand(member); + result.AddOperand(decoration); + result.AddOperand(parameters); + AddAnnotation(result); + + return result; + } + + // Type-Declaration + + public Instruction TypeVoid(bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeVoid); + + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeBool(bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeBool); + + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeInt(LiteralInteger width, LiteralInteger signedness, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeInt); + + result.AddOperand(width); + result.AddOperand(signedness); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeFloat(LiteralInteger width, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeFloat); + + result.AddOperand(width); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeVector(Instruction componentType, LiteralInteger componentCount, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeVector); + + result.AddOperand(componentType); + result.AddOperand(componentCount); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeMatrix(Instruction columnType, LiteralInteger columnCount, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeMatrix); + + result.AddOperand(columnType); + result.AddOperand(columnCount); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeImage(Instruction sampledType, Dim dim, LiteralInteger depth, LiteralInteger arrayed, LiteralInteger mS, LiteralInteger sampled, ImageFormat imageFormat, AccessQualifier accessQualifier = (AccessQualifier)int.MaxValue, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeImage); + + result.AddOperand(sampledType); + result.AddOperand(dim); + result.AddOperand(depth); + result.AddOperand(arrayed); + result.AddOperand(mS); + result.AddOperand(sampled); + result.AddOperand(imageFormat); + if (accessQualifier != (AccessQualifier)int.MaxValue) + { + result.AddOperand(accessQualifier); + } + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeSampler(bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeSampler); + + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeSampledImage(Instruction imageType, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeSampledImage); + + result.AddOperand(imageType); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeArray(Instruction elementType, Instruction length, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeArray); + + result.AddOperand(elementType); + result.AddOperand(length); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeRuntimeArray(Instruction elementType, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeRuntimeArray); + + result.AddOperand(elementType); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeStruct(bool forceIdAllocation, params Instruction[] parameters) + { + Instruction result = NewInstruction(Op.OpTypeStruct); + + result.AddOperand(parameters); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeOpaque(string thenameoftheopaquetype, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeOpaque); + + result.AddOperand(thenameoftheopaquetype); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypePointer(StorageClass storageClass, Instruction type, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypePointer); + + result.AddOperand(storageClass); + result.AddOperand(type); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeFunction(Instruction returnType, bool forceIdAllocation, params Instruction[] parameters) + { + Instruction result = NewInstruction(Op.OpTypeFunction); + + result.AddOperand(returnType); + result.AddOperand(parameters); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeEvent(bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeEvent); + + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeDeviceEvent(bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeDeviceEvent); + + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeReserveId(bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeReserveId); + + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeQueue(bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeQueue); + + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypePipe(AccessQualifier qualifier, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypePipe); + + result.AddOperand(qualifier); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeForwardPointer(Instruction pointerType, StorageClass storageClass, bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeForwardPointer); + + result.AddOperand(pointerType); + result.AddOperand(storageClass); + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypePipeStorage(bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypePipeStorage); + + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + public Instruction TypeNamedBarrier(bool forceIdAllocation = false) + { + Instruction result = NewInstruction(Op.OpTypeNamedBarrier); + + AddTypeDeclaration(result, forceIdAllocation); + + return result; + } + + // Constant-Creation + + public Instruction ConstantTrue(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpConstantTrue, Instruction.InvalidId, resultType); + + AddConstant(result); + + return result; + } + + public Instruction ConstantFalse(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpConstantFalse, Instruction.InvalidId, resultType); + + AddConstant(result); + + return result; + } + + public Instruction Constant(Instruction resultType, LiteralInteger value) + { + Instruction result = NewInstruction(Op.OpConstant, Instruction.InvalidId, resultType); + + result.AddOperand(value); + AddConstant(result); + + return result; + } + + public Instruction ConstantComposite(Instruction resultType, params Instruction[] constituents) + { + Instruction result = NewInstruction(Op.OpConstantComposite, Instruction.InvalidId, resultType); + + result.AddOperand(constituents); + AddConstant(result); + + return result; + } + + public Instruction ConstantSampler(Instruction resultType, SamplerAddressingMode samplerAddressingMode, LiteralInteger param, SamplerFilterMode samplerFilterMode) + { + Instruction result = NewInstruction(Op.OpConstantSampler, Instruction.InvalidId, resultType); + + result.AddOperand(samplerAddressingMode); + result.AddOperand(param); + result.AddOperand(samplerFilterMode); + AddConstant(result); + + return result; + } + + public Instruction ConstantNull(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpConstantNull, Instruction.InvalidId, resultType); + + AddConstant(result); + + return result; + } + + public Instruction SpecConstantTrue(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpSpecConstantTrue, GetNewId(), resultType); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SpecConstantFalse(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpSpecConstantFalse, GetNewId(), resultType); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SpecConstant(Instruction resultType, LiteralInteger value) + { + Instruction result = NewInstruction(Op.OpSpecConstant, GetNewId(), resultType); + + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SpecConstantComposite(Instruction resultType, params Instruction[] constituents) + { + Instruction result = NewInstruction(Op.OpSpecConstantComposite, GetNewId(), resultType); + + result.AddOperand(constituents); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SpecConstantOp(Instruction resultType, LiteralInteger opcode) + { + Instruction result = NewInstruction(Op.OpSpecConstantOp, GetNewId(), resultType); + + result.AddOperand(opcode); + AddToFunctionDefinitions(result); + + return result; + } + + // Memory + + public Instruction Variable(Instruction resultType, StorageClass storageClass, Instruction initializer = null) + { + Instruction result = NewInstruction(Op.OpVariable, GetNewId(), resultType); + + result.AddOperand(storageClass); + if (initializer != null) + { + result.AddOperand(initializer); + } + return result; + } + + public Instruction ImageTexelPointer(Instruction resultType, Instruction image, Instruction coordinate, Instruction sample) + { + Instruction result = NewInstruction(Op.OpImageTexelPointer, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(coordinate); + result.AddOperand(sample); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Load(Instruction resultType, Instruction pointer, MemoryAccessMask memoryAccess = (MemoryAccessMask)int.MaxValue) + { + Instruction result = NewInstruction(Op.OpLoad, GetNewId(), resultType); + + result.AddOperand(pointer); + if (memoryAccess != (MemoryAccessMask)int.MaxValue) + { + result.AddOperand(memoryAccess); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Store(Instruction pointer, Instruction obj, MemoryAccessMask memoryAccess = (MemoryAccessMask)int.MaxValue) + { + Instruction result = NewInstruction(Op.OpStore); + + result.AddOperand(pointer); + result.AddOperand(obj); + if (memoryAccess != (MemoryAccessMask)int.MaxValue) + { + result.AddOperand(memoryAccess); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CopyMemory(Instruction target, Instruction source, MemoryAccessMask memoryAccess0 = (MemoryAccessMask)int.MaxValue, MemoryAccessMask memoryAccess1 = (MemoryAccessMask)int.MaxValue) + { + Instruction result = NewInstruction(Op.OpCopyMemory); + + result.AddOperand(target); + result.AddOperand(source); + if (memoryAccess0 != (MemoryAccessMask)int.MaxValue) + { + result.AddOperand(memoryAccess0); + } + if (memoryAccess1 != (MemoryAccessMask)int.MaxValue) + { + result.AddOperand(memoryAccess1); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CopyMemorySized(Instruction target, Instruction source, Instruction size, MemoryAccessMask memoryAccess0 = (MemoryAccessMask)int.MaxValue, MemoryAccessMask memoryAccess1 = (MemoryAccessMask)int.MaxValue) + { + Instruction result = NewInstruction(Op.OpCopyMemorySized); + + result.AddOperand(target); + result.AddOperand(source); + result.AddOperand(size); + if (memoryAccess0 != (MemoryAccessMask)int.MaxValue) + { + result.AddOperand(memoryAccess0); + } + if (memoryAccess1 != (MemoryAccessMask)int.MaxValue) + { + result.AddOperand(memoryAccess1); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AccessChain(Instruction resultType, Instruction baseObj, Instruction index) + { + Instruction result = NewInstruction(Op.OpAccessChain, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(index); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AccessChain(Instruction resultType, Instruction baseObj, Instruction index0, Instruction index1) + { + Instruction result = NewInstruction(Op.OpAccessChain, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(index0); + result.AddOperand(index1); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AccessChain(Instruction resultType, Instruction baseObj, Instruction index0, Instruction index1, Instruction index2) + { + Instruction result = NewInstruction(Op.OpAccessChain, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(index0); + result.AddOperand(index1); + result.AddOperand(index2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AccessChain(Instruction resultType, Instruction baseObj, params Instruction[] indexes) + { + Instruction result = NewInstruction(Op.OpAccessChain, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(indexes); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction InBoundsAccessChain(Instruction resultType, Instruction baseObj, params Instruction[] indexes) + { + Instruction result = NewInstruction(Op.OpInBoundsAccessChain, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(indexes); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction PtrAccessChain(Instruction resultType, Instruction baseObj, Instruction element, params Instruction[] indexes) + { + Instruction result = NewInstruction(Op.OpPtrAccessChain, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(element); + result.AddOperand(indexes); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ArrayLength(Instruction resultType, Instruction structure, LiteralInteger arraymember) + { + Instruction result = NewInstruction(Op.OpArrayLength, GetNewId(), resultType); + + result.AddOperand(structure); + result.AddOperand(arraymember); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GenericPtrMemSemantics(Instruction resultType, Instruction pointer) + { + Instruction result = NewInstruction(Op.OpGenericPtrMemSemantics, GetNewId(), resultType); + + result.AddOperand(pointer); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction InBoundsPtrAccessChain(Instruction resultType, Instruction baseObj, Instruction element, params Instruction[] indexes) + { + Instruction result = NewInstruction(Op.OpInBoundsPtrAccessChain, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(element); + result.AddOperand(indexes); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction PtrEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpPtrEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction PtrNotEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpPtrNotEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction PtrDiff(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpPtrDiff, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + // Function + + public Instruction Function(Instruction resultType, FunctionControlMask functionControl, Instruction functionType) + { + Instruction result = NewInstruction(Op.OpFunction, GetNewId(), resultType); + + result.AddOperand(functionControl); + result.AddOperand(functionType); + + return result; + } + + public void AddFunction(Instruction function) + { + AddToFunctionDefinitions(function); + } + + public Instruction FunctionParameter(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpFunctionParameter, GetNewId(), resultType); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FunctionEnd() + { + Instruction result = NewInstruction(Op.OpFunctionEnd); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FunctionCall(Instruction resultType, Instruction function, params Instruction[] parameters) + { + Instruction result = NewInstruction(Op.OpFunctionCall, GetNewId(), resultType); + + result.AddOperand(function); + result.AddOperand(parameters); + AddToFunctionDefinitions(result); + + return result; + } + + // Image + + public Instruction SampledImage(Instruction resultType, Instruction image, Instruction sampler) + { + Instruction result = NewInstruction(Op.OpSampledImage, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(sampler); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSampleImplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSampleImplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSampleExplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSampleExplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(imageOperands); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSampleDrefImplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSampleDrefImplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSampleDrefExplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSampleDrefExplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + result.AddOperand(imageOperands); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSampleProjImplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSampleProjImplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSampleProjExplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSampleProjExplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(imageOperands); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSampleProjDrefImplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSampleProjDrefImplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSampleProjDrefExplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSampleProjDrefExplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + result.AddOperand(imageOperands); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageFetch(Instruction resultType, Instruction image, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageFetch, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(coordinate); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageGather(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction component, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageGather, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(component); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageDrefGather(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageDrefGather, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageRead(Instruction resultType, Instruction image, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageRead, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(coordinate); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageWrite(Instruction image, Instruction coordinate, Instruction texel, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageWrite); + + result.AddOperand(image); + result.AddOperand(coordinate); + result.AddOperand(texel); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Image(Instruction resultType, Instruction sampledImage) + { + Instruction result = NewInstruction(Op.OpImage, GetNewId(), resultType); + + result.AddOperand(sampledImage); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageQueryFormat(Instruction resultType, Instruction image) + { + Instruction result = NewInstruction(Op.OpImageQueryFormat, GetNewId(), resultType); + + result.AddOperand(image); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageQueryOrder(Instruction resultType, Instruction image) + { + Instruction result = NewInstruction(Op.OpImageQueryOrder, GetNewId(), resultType); + + result.AddOperand(image); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageQuerySizeLod(Instruction resultType, Instruction image, Instruction levelofDetail) + { + Instruction result = NewInstruction(Op.OpImageQuerySizeLod, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(levelofDetail); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageQuerySize(Instruction resultType, Instruction image) + { + Instruction result = NewInstruction(Op.OpImageQuerySize, GetNewId(), resultType); + + result.AddOperand(image); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageQueryLod(Instruction resultType, Instruction sampledImage, Instruction coordinate) + { + Instruction result = NewInstruction(Op.OpImageQueryLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageQueryLevels(Instruction resultType, Instruction image) + { + Instruction result = NewInstruction(Op.OpImageQueryLevels, GetNewId(), resultType); + + result.AddOperand(image); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageQuerySamples(Instruction resultType, Instruction image) + { + Instruction result = NewInstruction(Op.OpImageQuerySamples, GetNewId(), resultType); + + result.AddOperand(image); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseSampleImplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseSampleImplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseSampleExplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseSampleExplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(imageOperands); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseSampleDrefImplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseSampleDrefImplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseSampleDrefExplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseSampleDrefExplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + result.AddOperand(imageOperands); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseSampleProjImplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseSampleProjImplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseSampleProjExplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseSampleProjExplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(imageOperands); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseSampleProjDrefImplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseSampleProjDrefImplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseSampleProjDrefExplicitLod(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseSampleProjDrefExplicitLod, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + result.AddOperand(imageOperands); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseFetch(Instruction resultType, Instruction image, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseFetch, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(coordinate); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseGather(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction component, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseGather, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(component); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseDrefGather(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction dRef, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseDrefGather, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(dRef); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseTexelsResident(Instruction resultType, Instruction residentCode) + { + Instruction result = NewInstruction(Op.OpImageSparseTexelsResident, GetNewId(), resultType); + + result.AddOperand(residentCode); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSparseRead(Instruction resultType, Instruction image, Instruction coordinate, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSparseRead, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(coordinate); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ImageSampleFootprintNV(Instruction resultType, Instruction sampledImage, Instruction coordinate, Instruction granularity, Instruction coarse, ImageOperandsMask imageOperands, params Instruction[] imageOperandIds) + { + Instruction result = NewInstruction(Op.OpImageSampleFootprintNV, GetNewId(), resultType); + + result.AddOperand(sampledImage); + result.AddOperand(coordinate); + result.AddOperand(granularity); + result.AddOperand(coarse); + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperands); + } + if (imageOperands != (ImageOperandsMask)int.MaxValue) + { + result.AddOperand(imageOperandIds); + } + AddToFunctionDefinitions(result); + + return result; + } + + // Conversion + + public Instruction ConvertFToU(Instruction resultType, Instruction floatValue) + { + Instruction result = NewInstruction(Op.OpConvertFToU, GetNewId(), resultType); + + result.AddOperand(floatValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ConvertFToS(Instruction resultType, Instruction floatValue) + { + Instruction result = NewInstruction(Op.OpConvertFToS, GetNewId(), resultType); + + result.AddOperand(floatValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ConvertSToF(Instruction resultType, Instruction signedValue) + { + Instruction result = NewInstruction(Op.OpConvertSToF, GetNewId(), resultType); + + result.AddOperand(signedValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ConvertUToF(Instruction resultType, Instruction unsignedValue) + { + Instruction result = NewInstruction(Op.OpConvertUToF, GetNewId(), resultType); + + result.AddOperand(unsignedValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UConvert(Instruction resultType, Instruction unsignedValue) + { + Instruction result = NewInstruction(Op.OpUConvert, GetNewId(), resultType); + + result.AddOperand(unsignedValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SConvert(Instruction resultType, Instruction signedValue) + { + Instruction result = NewInstruction(Op.OpSConvert, GetNewId(), resultType); + + result.AddOperand(signedValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FConvert(Instruction resultType, Instruction floatValue) + { + Instruction result = NewInstruction(Op.OpFConvert, GetNewId(), resultType); + + result.AddOperand(floatValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction QuantizeToF16(Instruction resultType, Instruction value) + { + Instruction result = NewInstruction(Op.OpQuantizeToF16, GetNewId(), resultType); + + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ConvertPtrToU(Instruction resultType, Instruction pointer) + { + Instruction result = NewInstruction(Op.OpConvertPtrToU, GetNewId(), resultType); + + result.AddOperand(pointer); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SatConvertSToU(Instruction resultType, Instruction signedValue) + { + Instruction result = NewInstruction(Op.OpSatConvertSToU, GetNewId(), resultType); + + result.AddOperand(signedValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SatConvertUToS(Instruction resultType, Instruction unsignedValue) + { + Instruction result = NewInstruction(Op.OpSatConvertUToS, GetNewId(), resultType); + + result.AddOperand(unsignedValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ConvertUToPtr(Instruction resultType, Instruction integerValue) + { + Instruction result = NewInstruction(Op.OpConvertUToPtr, GetNewId(), resultType); + + result.AddOperand(integerValue); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction PtrCastToGeneric(Instruction resultType, Instruction pointer) + { + Instruction result = NewInstruction(Op.OpPtrCastToGeneric, GetNewId(), resultType); + + result.AddOperand(pointer); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GenericCastToPtr(Instruction resultType, Instruction pointer) + { + Instruction result = NewInstruction(Op.OpGenericCastToPtr, GetNewId(), resultType); + + result.AddOperand(pointer); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GenericCastToPtrExplicit(Instruction resultType, Instruction pointer, StorageClass storage) + { + Instruction result = NewInstruction(Op.OpGenericCastToPtrExplicit, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(storage); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Bitcast(Instruction resultType, Instruction operand) + { + Instruction result = NewInstruction(Op.OpBitcast, GetNewId(), resultType); + + result.AddOperand(operand); + AddToFunctionDefinitions(result); + + return result; + } + + // Composite + + public Instruction VectorExtractDynamic(Instruction resultType, Instruction vector, Instruction index) + { + Instruction result = NewInstruction(Op.OpVectorExtractDynamic, GetNewId(), resultType); + + result.AddOperand(vector); + result.AddOperand(index); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction VectorInsertDynamic(Instruction resultType, Instruction vector, Instruction component, Instruction index) + { + Instruction result = NewInstruction(Op.OpVectorInsertDynamic, GetNewId(), resultType); + + result.AddOperand(vector); + result.AddOperand(component); + result.AddOperand(index); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction VectorShuffle(Instruction resultType, Instruction vector1, Instruction vector2, params LiteralInteger[] components) + { + Instruction result = NewInstruction(Op.OpVectorShuffle, GetNewId(), resultType); + + result.AddOperand(vector1); + result.AddOperand(vector2); + result.AddOperand(components); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CompositeConstruct(Instruction resultType, params Instruction[] constituents) + { + Instruction result = NewInstruction(Op.OpCompositeConstruct, GetNewId(), resultType); + + result.AddOperand(constituents); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CompositeExtract(Instruction resultType, Instruction composite, params LiteralInteger[] indexes) + { + Instruction result = NewInstruction(Op.OpCompositeExtract, GetNewId(), resultType); + + result.AddOperand(composite); + result.AddOperand(indexes); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CompositeInsert(Instruction resultType, Instruction obj, Instruction composite, params LiteralInteger[] indexes) + { + Instruction result = NewInstruction(Op.OpCompositeInsert, GetNewId(), resultType); + + result.AddOperand(obj); + result.AddOperand(composite); + result.AddOperand(indexes); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CopyObject(Instruction resultType, Instruction operand) + { + Instruction result = NewInstruction(Op.OpCopyObject, GetNewId(), resultType); + + result.AddOperand(operand); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Transpose(Instruction resultType, Instruction matrix) + { + Instruction result = NewInstruction(Op.OpTranspose, GetNewId(), resultType); + + result.AddOperand(matrix); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CopyLogical(Instruction resultType, Instruction operand) + { + Instruction result = NewInstruction(Op.OpCopyLogical, GetNewId(), resultType); + + result.AddOperand(operand); + AddToFunctionDefinitions(result); + + return result; + } + + // Arithmetic + + public Instruction SNegate(Instruction resultType, Instruction operand) + { + Instruction result = NewInstruction(Op.OpSNegate, GetNewId(), resultType); + + result.AddOperand(operand); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FNegate(Instruction resultType, Instruction operand) + { + Instruction result = NewInstruction(Op.OpFNegate, GetNewId(), resultType); + + result.AddOperand(operand); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IAdd(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpIAdd, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FAdd(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFAdd, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ISub(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpISub, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FSub(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFSub, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IMul(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpIMul, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FMul(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFMul, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UDiv(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUDiv, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SDiv(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpSDiv, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FDiv(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFDiv, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UMod(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUMod, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SRem(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpSRem, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SMod(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpSMod, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FRem(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFRem, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FMod(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFMod, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction VectorTimesScalar(Instruction resultType, Instruction vector, Instruction scalar) + { + Instruction result = NewInstruction(Op.OpVectorTimesScalar, GetNewId(), resultType); + + result.AddOperand(vector); + result.AddOperand(scalar); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction MatrixTimesScalar(Instruction resultType, Instruction matrix, Instruction scalar) + { + Instruction result = NewInstruction(Op.OpMatrixTimesScalar, GetNewId(), resultType); + + result.AddOperand(matrix); + result.AddOperand(scalar); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction VectorTimesMatrix(Instruction resultType, Instruction vector, Instruction matrix) + { + Instruction result = NewInstruction(Op.OpVectorTimesMatrix, GetNewId(), resultType); + + result.AddOperand(vector); + result.AddOperand(matrix); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction MatrixTimesVector(Instruction resultType, Instruction matrix, Instruction vector) + { + Instruction result = NewInstruction(Op.OpMatrixTimesVector, GetNewId(), resultType); + + result.AddOperand(matrix); + result.AddOperand(vector); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction MatrixTimesMatrix(Instruction resultType, Instruction leftMatrix, Instruction rightMatrix) + { + Instruction result = NewInstruction(Op.OpMatrixTimesMatrix, GetNewId(), resultType); + + result.AddOperand(leftMatrix); + result.AddOperand(rightMatrix); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction OuterProduct(Instruction resultType, Instruction vector1, Instruction vector2) + { + Instruction result = NewInstruction(Op.OpOuterProduct, GetNewId(), resultType); + + result.AddOperand(vector1); + result.AddOperand(vector2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Dot(Instruction resultType, Instruction vector1, Instruction vector2) + { + Instruction result = NewInstruction(Op.OpDot, GetNewId(), resultType); + + result.AddOperand(vector1); + result.AddOperand(vector2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IAddCarry(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpIAddCarry, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ISubBorrow(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpISubBorrow, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UMulExtended(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUMulExtended, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SMulExtended(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpSMulExtended, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + // Bit + + public Instruction ShiftRightLogical(Instruction resultType, Instruction baseObj, Instruction shift) + { + Instruction result = NewInstruction(Op.OpShiftRightLogical, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(shift); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ShiftRightArithmetic(Instruction resultType, Instruction baseObj, Instruction shift) + { + Instruction result = NewInstruction(Op.OpShiftRightArithmetic, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(shift); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ShiftLeftLogical(Instruction resultType, Instruction baseObj, Instruction shift) + { + Instruction result = NewInstruction(Op.OpShiftLeftLogical, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(shift); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BitwiseOr(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpBitwiseOr, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BitwiseXor(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpBitwiseXor, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BitwiseAnd(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpBitwiseAnd, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Not(Instruction resultType, Instruction operand) + { + Instruction result = NewInstruction(Op.OpNot, GetNewId(), resultType); + + result.AddOperand(operand); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BitFieldInsert(Instruction resultType, Instruction baseObj, Instruction insert, Instruction offset, Instruction count) + { + Instruction result = NewInstruction(Op.OpBitFieldInsert, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(insert); + result.AddOperand(offset); + result.AddOperand(count); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BitFieldSExtract(Instruction resultType, Instruction baseObj, Instruction offset, Instruction count) + { + Instruction result = NewInstruction(Op.OpBitFieldSExtract, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(offset); + result.AddOperand(count); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BitFieldUExtract(Instruction resultType, Instruction baseObj, Instruction offset, Instruction count) + { + Instruction result = NewInstruction(Op.OpBitFieldUExtract, GetNewId(), resultType); + + result.AddOperand(baseObj); + result.AddOperand(offset); + result.AddOperand(count); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BitReverse(Instruction resultType, Instruction baseObj) + { + Instruction result = NewInstruction(Op.OpBitReverse, GetNewId(), resultType); + + result.AddOperand(baseObj); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BitCount(Instruction resultType, Instruction baseObj) + { + Instruction result = NewInstruction(Op.OpBitCount, GetNewId(), resultType); + + result.AddOperand(baseObj); + AddToFunctionDefinitions(result); + + return result; + } + + // Relational_and_Logical + + public Instruction Any(Instruction resultType, Instruction vector) + { + Instruction result = NewInstruction(Op.OpAny, GetNewId(), resultType); + + result.AddOperand(vector); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction All(Instruction resultType, Instruction vector) + { + Instruction result = NewInstruction(Op.OpAll, GetNewId(), resultType); + + result.AddOperand(vector); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IsNan(Instruction resultType, Instruction x) + { + Instruction result = NewInstruction(Op.OpIsNan, GetNewId(), resultType); + + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IsInf(Instruction resultType, Instruction x) + { + Instruction result = NewInstruction(Op.OpIsInf, GetNewId(), resultType); + + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IsFinite(Instruction resultType, Instruction x) + { + Instruction result = NewInstruction(Op.OpIsFinite, GetNewId(), resultType); + + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IsNormal(Instruction resultType, Instruction x) + { + Instruction result = NewInstruction(Op.OpIsNormal, GetNewId(), resultType); + + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SignBitSet(Instruction resultType, Instruction x) + { + Instruction result = NewInstruction(Op.OpSignBitSet, GetNewId(), resultType); + + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LessOrGreater(Instruction resultType, Instruction x, Instruction y) + { + Instruction result = NewInstruction(Op.OpLessOrGreater, GetNewId(), resultType); + + result.AddOperand(x); + result.AddOperand(y); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Ordered(Instruction resultType, Instruction x, Instruction y) + { + Instruction result = NewInstruction(Op.OpOrdered, GetNewId(), resultType); + + result.AddOperand(x); + result.AddOperand(y); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Unordered(Instruction resultType, Instruction x, Instruction y) + { + Instruction result = NewInstruction(Op.OpUnordered, GetNewId(), resultType); + + result.AddOperand(x); + result.AddOperand(y); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LogicalEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpLogicalEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LogicalNotEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpLogicalNotEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LogicalOr(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpLogicalOr, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LogicalAnd(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpLogicalAnd, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LogicalNot(Instruction resultType, Instruction operand) + { + Instruction result = NewInstruction(Op.OpLogicalNot, GetNewId(), resultType); + + result.AddOperand(operand); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Select(Instruction resultType, Instruction condition, Instruction object1, Instruction object2) + { + Instruction result = NewInstruction(Op.OpSelect, GetNewId(), resultType); + + result.AddOperand(condition); + result.AddOperand(object1); + result.AddOperand(object2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpIEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction INotEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpINotEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UGreaterThan(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUGreaterThan, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SGreaterThan(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpSGreaterThan, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UGreaterThanEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUGreaterThanEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SGreaterThanEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpSGreaterThanEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ULessThan(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpULessThan, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SLessThan(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpSLessThan, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ULessThanEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpULessThanEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SLessThanEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpSLessThanEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FOrdEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFOrdEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FUnordEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFUnordEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FOrdNotEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFOrdNotEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FUnordNotEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFUnordNotEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FOrdLessThan(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFOrdLessThan, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FUnordLessThan(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFUnordLessThan, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FOrdGreaterThan(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFOrdGreaterThan, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FUnordGreaterThan(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFUnordGreaterThan, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FOrdLessThanEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFOrdLessThanEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FUnordLessThanEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFUnordLessThanEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FOrdGreaterThanEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFOrdGreaterThanEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FUnordGreaterThanEqual(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpFUnordGreaterThanEqual, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + // Derivative + + public Instruction DPdx(Instruction resultType, Instruction p) + { + Instruction result = NewInstruction(Op.OpDPdx, GetNewId(), resultType); + + result.AddOperand(p); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction DPdy(Instruction resultType, Instruction p) + { + Instruction result = NewInstruction(Op.OpDPdy, GetNewId(), resultType); + + result.AddOperand(p); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Fwidth(Instruction resultType, Instruction p) + { + Instruction result = NewInstruction(Op.OpFwidth, GetNewId(), resultType); + + result.AddOperand(p); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction DPdxFine(Instruction resultType, Instruction p) + { + Instruction result = NewInstruction(Op.OpDPdxFine, GetNewId(), resultType); + + result.AddOperand(p); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction DPdyFine(Instruction resultType, Instruction p) + { + Instruction result = NewInstruction(Op.OpDPdyFine, GetNewId(), resultType); + + result.AddOperand(p); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FwidthFine(Instruction resultType, Instruction p) + { + Instruction result = NewInstruction(Op.OpFwidthFine, GetNewId(), resultType); + + result.AddOperand(p); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction DPdxCoarse(Instruction resultType, Instruction p) + { + Instruction result = NewInstruction(Op.OpDPdxCoarse, GetNewId(), resultType); + + result.AddOperand(p); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction DPdyCoarse(Instruction resultType, Instruction p) + { + Instruction result = NewInstruction(Op.OpDPdyCoarse, GetNewId(), resultType); + + result.AddOperand(p); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FwidthCoarse(Instruction resultType, Instruction p) + { + Instruction result = NewInstruction(Op.OpFwidthCoarse, GetNewId(), resultType); + + result.AddOperand(p); + AddToFunctionDefinitions(result); + + return result; + } + + // Control-Flow + + public Instruction Phi(Instruction resultType, params Instruction[] parameters) + { + Instruction result = NewInstruction(Op.OpPhi, GetNewId(), resultType); + + result.AddOperand(parameters); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LoopMerge(Instruction mergeBlock, Instruction continueTarget, LoopControlMask loopControl) + { + Instruction result = NewInstruction(Op.OpLoopMerge); + + result.AddOperand(mergeBlock); + result.AddOperand(continueTarget); + result.AddOperand(loopControl); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SelectionMerge(Instruction mergeBlock, SelectionControlMask selectionControl) + { + Instruction result = NewInstruction(Op.OpSelectionMerge); + + result.AddOperand(mergeBlock); + result.AddOperand(selectionControl); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Label() + { + Instruction result = NewInstruction(Op.OpLabel); + + return result; + } + + public Instruction Branch(Instruction targetLabel) + { + Instruction result = NewInstruction(Op.OpBranch); + + result.AddOperand(targetLabel); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BranchConditional(Instruction condition, Instruction trueLabel, Instruction falseLabel, params LiteralInteger[] branchweights) + { + Instruction result = NewInstruction(Op.OpBranchConditional); + + result.AddOperand(condition); + result.AddOperand(trueLabel); + result.AddOperand(falseLabel); + result.AddOperand(branchweights); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Switch(Instruction selector, Instruction defaultObj, params Operand[] target) + { + Instruction result = NewInstruction(Op.OpSwitch); + + result.AddOperand(selector); + result.AddOperand(defaultObj); + result.AddOperand(target); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Kill() + { + Instruction result = NewInstruction(Op.OpKill); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Return() + { + Instruction result = NewInstruction(Op.OpReturn); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReturnValue(Instruction value) + { + Instruction result = NewInstruction(Op.OpReturnValue); + + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction Unreachable() + { + Instruction result = NewInstruction(Op.OpUnreachable); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LifetimeStart(Instruction pointer, LiteralInteger size) + { + Instruction result = NewInstruction(Op.OpLifetimeStart); + + result.AddOperand(pointer); + result.AddOperand(size); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LifetimeStop(Instruction pointer, LiteralInteger size) + { + Instruction result = NewInstruction(Op.OpLifetimeStop); + + result.AddOperand(pointer); + result.AddOperand(size); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction TerminateInvocation() + { + Instruction result = NewInstruction(Op.OpTerminateInvocation); + + AddToFunctionDefinitions(result); + + return result; + } + + // Atomic + + public Instruction AtomicLoad(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics) + { + Instruction result = NewInstruction(Op.OpAtomicLoad, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicStore(Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicStore); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicExchange(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicExchange, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicCompareExchange(Instruction resultType, Instruction pointer, Instruction memory, Instruction equal, Instruction unequal, Instruction value, Instruction comparator) + { + Instruction result = NewInstruction(Op.OpAtomicCompareExchange, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(equal); + result.AddOperand(unequal); + result.AddOperand(value); + result.AddOperand(comparator); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicCompareExchangeWeak(Instruction resultType, Instruction pointer, Instruction memory, Instruction equal, Instruction unequal, Instruction value, Instruction comparator) + { + Instruction result = NewInstruction(Op.OpAtomicCompareExchangeWeak, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(equal); + result.AddOperand(unequal); + result.AddOperand(value); + result.AddOperand(comparator); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicIIncrement(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics) + { + Instruction result = NewInstruction(Op.OpAtomicIIncrement, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicIDecrement(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics) + { + Instruction result = NewInstruction(Op.OpAtomicIDecrement, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicIAdd(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicIAdd, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicISub(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicISub, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicSMin(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicSMin, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicUMin(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicUMin, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicSMax(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicSMax, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicUMax(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicUMax, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicAnd(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicAnd, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicOr(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicOr, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicXor(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicXor, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicFlagTestAndSet(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics) + { + Instruction result = NewInstruction(Op.OpAtomicFlagTestAndSet, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicFlagClear(Instruction pointer, Instruction memory, Instruction semantics) + { + Instruction result = NewInstruction(Op.OpAtomicFlagClear); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AtomicFAddEXT(Instruction resultType, Instruction pointer, Instruction memory, Instruction semantics, Instruction value) + { + Instruction result = NewInstruction(Op.OpAtomicFAddEXT, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(memory); + result.AddOperand(semantics); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + // Primitive + + public Instruction EmitVertex() + { + Instruction result = NewInstruction(Op.OpEmitVertex); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction EndPrimitive() + { + Instruction result = NewInstruction(Op.OpEndPrimitive); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction EmitStreamVertex(Instruction stream) + { + Instruction result = NewInstruction(Op.OpEmitStreamVertex); + + result.AddOperand(stream); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction EndStreamPrimitive(Instruction stream) + { + Instruction result = NewInstruction(Op.OpEndStreamPrimitive); + + result.AddOperand(stream); + AddToFunctionDefinitions(result); + + return result; + } + + // Barrier + + public Instruction ControlBarrier(Instruction execution, Instruction memory, Instruction semantics) + { + Instruction result = NewInstruction(Op.OpControlBarrier); + + result.AddOperand(execution); + result.AddOperand(memory); + result.AddOperand(semantics); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction MemoryBarrier(Instruction memory, Instruction semantics) + { + Instruction result = NewInstruction(Op.OpMemoryBarrier); + + result.AddOperand(memory); + result.AddOperand(semantics); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction NamedBarrierInitialize(Instruction resultType, Instruction subgroupCount) + { + Instruction result = NewInstruction(Op.OpNamedBarrierInitialize, GetNewId(), resultType); + + result.AddOperand(subgroupCount); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction MemoryNamedBarrier(Instruction namedBarrier, Instruction memory, Instruction semantics) + { + Instruction result = NewInstruction(Op.OpMemoryNamedBarrier); + + result.AddOperand(namedBarrier); + result.AddOperand(memory); + result.AddOperand(semantics); + AddToFunctionDefinitions(result); + + return result; + } + + // Group + + public Instruction GroupAsyncCopy(Instruction resultType, Instruction execution, Instruction destination, Instruction source, Instruction numElements, Instruction stride, Instruction eventObj) + { + Instruction result = NewInstruction(Op.OpGroupAsyncCopy, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(destination); + result.AddOperand(source); + result.AddOperand(numElements); + result.AddOperand(stride); + result.AddOperand(eventObj); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupWaitEvents(Instruction execution, Instruction numEvents, Instruction eventsList) + { + Instruction result = NewInstruction(Op.OpGroupWaitEvents); + + result.AddOperand(execution); + result.AddOperand(numEvents); + result.AddOperand(eventsList); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupAll(Instruction resultType, Instruction execution, Instruction predicate) + { + Instruction result = NewInstruction(Op.OpGroupAll, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(predicate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupAny(Instruction resultType, Instruction execution, Instruction predicate) + { + Instruction result = NewInstruction(Op.OpGroupAny, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(predicate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupBroadcast(Instruction resultType, Instruction execution, Instruction value, Instruction localId) + { + Instruction result = NewInstruction(Op.OpGroupBroadcast, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + result.AddOperand(localId); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupIAdd(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupIAdd, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupFAdd(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupFAdd, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupFMin(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupFMin, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupUMin(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupUMin, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupSMin(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupSMin, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupFMax(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupFMax, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupUMax(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupUMax, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupSMax(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupSMax, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupBallotKHR(Instruction resultType, Instruction predicate) + { + Instruction result = NewInstruction(Op.OpSubgroupBallotKHR, GetNewId(), resultType); + + result.AddOperand(predicate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupFirstInvocationKHR(Instruction resultType, Instruction value) + { + Instruction result = NewInstruction(Op.OpSubgroupFirstInvocationKHR, GetNewId(), resultType); + + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupAllKHR(Instruction resultType, Instruction predicate) + { + Instruction result = NewInstruction(Op.OpSubgroupAllKHR, GetNewId(), resultType); + + result.AddOperand(predicate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupAnyKHR(Instruction resultType, Instruction predicate) + { + Instruction result = NewInstruction(Op.OpSubgroupAnyKHR, GetNewId(), resultType); + + result.AddOperand(predicate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupAllEqualKHR(Instruction resultType, Instruction predicate) + { + Instruction result = NewInstruction(Op.OpSubgroupAllEqualKHR, GetNewId(), resultType); + + result.AddOperand(predicate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupReadInvocationKHR(Instruction resultType, Instruction value, Instruction index) + { + Instruction result = NewInstruction(Op.OpSubgroupReadInvocationKHR, GetNewId(), resultType); + + result.AddOperand(value); + result.AddOperand(index); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupIAddNonUniformAMD(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupIAddNonUniformAMD, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupFAddNonUniformAMD(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupFAddNonUniformAMD, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupFMinNonUniformAMD(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupFMinNonUniformAMD, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupUMinNonUniformAMD(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupUMinNonUniformAMD, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupSMinNonUniformAMD(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupSMinNonUniformAMD, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupFMaxNonUniformAMD(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupFMaxNonUniformAMD, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupUMaxNonUniformAMD(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupUMaxNonUniformAMD, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupSMaxNonUniformAMD(Instruction resultType, Instruction execution, GroupOperation operation, Instruction x) + { + Instruction result = NewInstruction(Op.OpGroupSMaxNonUniformAMD, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(x); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupShuffleINTEL(Instruction resultType, Instruction data, Instruction invocationId) + { + Instruction result = NewInstruction(Op.OpSubgroupShuffleINTEL, GetNewId(), resultType); + + result.AddOperand(data); + result.AddOperand(invocationId); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupShuffleDownINTEL(Instruction resultType, Instruction current, Instruction next, Instruction delta) + { + Instruction result = NewInstruction(Op.OpSubgroupShuffleDownINTEL, GetNewId(), resultType); + + result.AddOperand(current); + result.AddOperand(next); + result.AddOperand(delta); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupShuffleUpINTEL(Instruction resultType, Instruction previous, Instruction current, Instruction delta) + { + Instruction result = NewInstruction(Op.OpSubgroupShuffleUpINTEL, GetNewId(), resultType); + + result.AddOperand(previous); + result.AddOperand(current); + result.AddOperand(delta); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupShuffleXorINTEL(Instruction resultType, Instruction data, Instruction value) + { + Instruction result = NewInstruction(Op.OpSubgroupShuffleXorINTEL, GetNewId(), resultType); + + result.AddOperand(data); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupBlockReadINTEL(Instruction resultType, Instruction ptr) + { + Instruction result = NewInstruction(Op.OpSubgroupBlockReadINTEL, GetNewId(), resultType); + + result.AddOperand(ptr); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupBlockWriteINTEL(Instruction ptr, Instruction data) + { + Instruction result = NewInstruction(Op.OpSubgroupBlockWriteINTEL); + + result.AddOperand(ptr); + result.AddOperand(data); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupImageBlockReadINTEL(Instruction resultType, Instruction image, Instruction coordinate) + { + Instruction result = NewInstruction(Op.OpSubgroupImageBlockReadINTEL, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(coordinate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupImageBlockWriteINTEL(Instruction image, Instruction coordinate, Instruction data) + { + Instruction result = NewInstruction(Op.OpSubgroupImageBlockWriteINTEL); + + result.AddOperand(image); + result.AddOperand(coordinate); + result.AddOperand(data); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupImageMediaBlockReadINTEL(Instruction resultType, Instruction image, Instruction coordinate, Instruction width, Instruction height) + { + Instruction result = NewInstruction(Op.OpSubgroupImageMediaBlockReadINTEL, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(coordinate); + result.AddOperand(width); + result.AddOperand(height); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SubgroupImageMediaBlockWriteINTEL(Instruction image, Instruction coordinate, Instruction width, Instruction height, Instruction data) + { + Instruction result = NewInstruction(Op.OpSubgroupImageMediaBlockWriteINTEL); + + result.AddOperand(image); + result.AddOperand(coordinate); + result.AddOperand(width); + result.AddOperand(height); + result.AddOperand(data); + AddToFunctionDefinitions(result); + + return result; + } + + // Device-Side_Enqueue + + public Instruction EnqueueMarker(Instruction resultType, Instruction queue, Instruction numEvents, Instruction waitEvents, Instruction retEvent) + { + Instruction result = NewInstruction(Op.OpEnqueueMarker, GetNewId(), resultType); + + result.AddOperand(queue); + result.AddOperand(numEvents); + result.AddOperand(waitEvents); + result.AddOperand(retEvent); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction EnqueueKernel(Instruction resultType, Instruction queue, Instruction flags, Instruction nDRange, Instruction numEvents, Instruction waitEvents, Instruction retEvent, Instruction invoke, Instruction param, Instruction paramSize, Instruction paramAlign, params Instruction[] localSize) + { + Instruction result = NewInstruction(Op.OpEnqueueKernel, GetNewId(), resultType); + + result.AddOperand(queue); + result.AddOperand(flags); + result.AddOperand(nDRange); + result.AddOperand(numEvents); + result.AddOperand(waitEvents); + result.AddOperand(retEvent); + result.AddOperand(invoke); + result.AddOperand(param); + result.AddOperand(paramSize); + result.AddOperand(paramAlign); + result.AddOperand(localSize); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GetKernelNDrangeSubGroupCount(Instruction resultType, Instruction nDRange, Instruction invoke, Instruction param, Instruction paramSize, Instruction paramAlign) + { + Instruction result = NewInstruction(Op.OpGetKernelNDrangeSubGroupCount, GetNewId(), resultType); + + result.AddOperand(nDRange); + result.AddOperand(invoke); + result.AddOperand(param); + result.AddOperand(paramSize); + result.AddOperand(paramAlign); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GetKernelNDrangeMaxSubGroupSize(Instruction resultType, Instruction nDRange, Instruction invoke, Instruction param, Instruction paramSize, Instruction paramAlign) + { + Instruction result = NewInstruction(Op.OpGetKernelNDrangeMaxSubGroupSize, GetNewId(), resultType); + + result.AddOperand(nDRange); + result.AddOperand(invoke); + result.AddOperand(param); + result.AddOperand(paramSize); + result.AddOperand(paramAlign); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GetKernelWorkGroupSize(Instruction resultType, Instruction invoke, Instruction param, Instruction paramSize, Instruction paramAlign) + { + Instruction result = NewInstruction(Op.OpGetKernelWorkGroupSize, GetNewId(), resultType); + + result.AddOperand(invoke); + result.AddOperand(param); + result.AddOperand(paramSize); + result.AddOperand(paramAlign); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GetKernelPreferredWorkGroupSizeMultiple(Instruction resultType, Instruction invoke, Instruction param, Instruction paramSize, Instruction paramAlign) + { + Instruction result = NewInstruction(Op.OpGetKernelPreferredWorkGroupSizeMultiple, GetNewId(), resultType); + + result.AddOperand(invoke); + result.AddOperand(param); + result.AddOperand(paramSize); + result.AddOperand(paramAlign); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RetainEvent(Instruction eventObj) + { + Instruction result = NewInstruction(Op.OpRetainEvent); + + result.AddOperand(eventObj); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReleaseEvent(Instruction eventObj) + { + Instruction result = NewInstruction(Op.OpReleaseEvent); + + result.AddOperand(eventObj); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CreateUserEvent(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpCreateUserEvent, GetNewId(), resultType); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IsValidEvent(Instruction resultType, Instruction eventObj) + { + Instruction result = NewInstruction(Op.OpIsValidEvent, GetNewId(), resultType); + + result.AddOperand(eventObj); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction SetUserEventStatus(Instruction eventObj, Instruction status) + { + Instruction result = NewInstruction(Op.OpSetUserEventStatus); + + result.AddOperand(eventObj); + result.AddOperand(status); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CaptureEventProfilingInfo(Instruction eventObj, Instruction profilingInfo, Instruction value) + { + Instruction result = NewInstruction(Op.OpCaptureEventProfilingInfo); + + result.AddOperand(eventObj); + result.AddOperand(profilingInfo); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GetDefaultQueue(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpGetDefaultQueue, GetNewId(), resultType); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BuildNDRange(Instruction resultType, Instruction globalWorkSize, Instruction localWorkSize, Instruction globalWorkOffset) + { + Instruction result = NewInstruction(Op.OpBuildNDRange, GetNewId(), resultType); + + result.AddOperand(globalWorkSize); + result.AddOperand(localWorkSize); + result.AddOperand(globalWorkOffset); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GetKernelLocalSizeForSubgroupCount(Instruction resultType, Instruction subgroupCount, Instruction invoke, Instruction param, Instruction paramSize, Instruction paramAlign) + { + Instruction result = NewInstruction(Op.OpGetKernelLocalSizeForSubgroupCount, GetNewId(), resultType); + + result.AddOperand(subgroupCount); + result.AddOperand(invoke); + result.AddOperand(param); + result.AddOperand(paramSize); + result.AddOperand(paramAlign); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GetKernelMaxNumSubgroups(Instruction resultType, Instruction invoke, Instruction param, Instruction paramSize, Instruction paramAlign) + { + Instruction result = NewInstruction(Op.OpGetKernelMaxNumSubgroups, GetNewId(), resultType); + + result.AddOperand(invoke); + result.AddOperand(param); + result.AddOperand(paramSize); + result.AddOperand(paramAlign); + AddToFunctionDefinitions(result); + + return result; + } + + // Pipe + + public Instruction ReadPipe(Instruction resultType, Instruction pipe, Instruction pointer, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpReadPipe, GetNewId(), resultType); + + result.AddOperand(pipe); + result.AddOperand(pointer); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction WritePipe(Instruction resultType, Instruction pipe, Instruction pointer, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpWritePipe, GetNewId(), resultType); + + result.AddOperand(pipe); + result.AddOperand(pointer); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReservedReadPipe(Instruction resultType, Instruction pipe, Instruction reserveId, Instruction index, Instruction pointer, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpReservedReadPipe, GetNewId(), resultType); + + result.AddOperand(pipe); + result.AddOperand(reserveId); + result.AddOperand(index); + result.AddOperand(pointer); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReservedWritePipe(Instruction resultType, Instruction pipe, Instruction reserveId, Instruction index, Instruction pointer, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpReservedWritePipe, GetNewId(), resultType); + + result.AddOperand(pipe); + result.AddOperand(reserveId); + result.AddOperand(index); + result.AddOperand(pointer); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReserveReadPipePackets(Instruction resultType, Instruction pipe, Instruction numPackets, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpReserveReadPipePackets, GetNewId(), resultType); + + result.AddOperand(pipe); + result.AddOperand(numPackets); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReserveWritePipePackets(Instruction resultType, Instruction pipe, Instruction numPackets, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpReserveWritePipePackets, GetNewId(), resultType); + + result.AddOperand(pipe); + result.AddOperand(numPackets); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CommitReadPipe(Instruction pipe, Instruction reserveId, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpCommitReadPipe); + + result.AddOperand(pipe); + result.AddOperand(reserveId); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CommitWritePipe(Instruction pipe, Instruction reserveId, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpCommitWritePipe); + + result.AddOperand(pipe); + result.AddOperand(reserveId); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IsValidReserveId(Instruction resultType, Instruction reserveId) + { + Instruction result = NewInstruction(Op.OpIsValidReserveId, GetNewId(), resultType); + + result.AddOperand(reserveId); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GetNumPipePackets(Instruction resultType, Instruction pipe, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpGetNumPipePackets, GetNewId(), resultType); + + result.AddOperand(pipe); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GetMaxPipePackets(Instruction resultType, Instruction pipe, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpGetMaxPipePackets, GetNewId(), resultType); + + result.AddOperand(pipe); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupReserveReadPipePackets(Instruction resultType, Instruction execution, Instruction pipe, Instruction numPackets, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpGroupReserveReadPipePackets, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(pipe); + result.AddOperand(numPackets); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupReserveWritePipePackets(Instruction resultType, Instruction execution, Instruction pipe, Instruction numPackets, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpGroupReserveWritePipePackets, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(pipe); + result.AddOperand(numPackets); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupCommitReadPipe(Instruction execution, Instruction pipe, Instruction reserveId, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpGroupCommitReadPipe); + + result.AddOperand(execution); + result.AddOperand(pipe); + result.AddOperand(reserveId); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupCommitWritePipe(Instruction execution, Instruction pipe, Instruction reserveId, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpGroupCommitWritePipe); + + result.AddOperand(execution); + result.AddOperand(pipe); + result.AddOperand(reserveId); + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ConstantPipeStorage(Instruction resultType, LiteralInteger packetSize, LiteralInteger packetAlignment, LiteralInteger capacity) + { + Instruction result = NewInstruction(Op.OpConstantPipeStorage, GetNewId(), resultType); + + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + result.AddOperand(capacity); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CreatePipeFromPipeStorage(Instruction resultType, Instruction pipeStorage) + { + Instruction result = NewInstruction(Op.OpCreatePipeFromPipeStorage, GetNewId(), resultType); + + result.AddOperand(pipeStorage); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReadPipeBlockingINTEL(Instruction resultType, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpReadPipeBlockingINTEL, GetNewId(), resultType); + + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction WritePipeBlockingINTEL(Instruction resultType, Instruction packetSize, Instruction packetAlignment) + { + Instruction result = NewInstruction(Op.OpWritePipeBlockingINTEL, GetNewId(), resultType); + + result.AddOperand(packetSize); + result.AddOperand(packetAlignment); + AddToFunctionDefinitions(result); + + return result; + } + + // Non-Uniform + + public Instruction GroupNonUniformElect(Instruction resultType, Instruction execution) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformElect, GetNewId(), resultType); + + result.AddOperand(execution); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformAll(Instruction resultType, Instruction execution, Instruction predicate) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformAll, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(predicate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformAny(Instruction resultType, Instruction execution, Instruction predicate) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformAny, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(predicate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformAllEqual(Instruction resultType, Instruction execution, Instruction value) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformAllEqual, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBroadcast(Instruction resultType, Instruction execution, Instruction value, Instruction id) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBroadcast, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + result.AddOperand(id); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBroadcastFirst(Instruction resultType, Instruction execution, Instruction value) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBroadcastFirst, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBallot(Instruction resultType, Instruction execution, Instruction predicate) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBallot, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(predicate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformInverseBallot(Instruction resultType, Instruction execution, Instruction value) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformInverseBallot, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBallotBitExtract(Instruction resultType, Instruction execution, Instruction value, Instruction index) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBallotBitExtract, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + result.AddOperand(index); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBallotBitCount(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBallotBitCount, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBallotFindLSB(Instruction resultType, Instruction execution, Instruction value) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBallotFindLSB, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBallotFindMSB(Instruction resultType, Instruction execution, Instruction value) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBallotFindMSB, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformShuffle(Instruction resultType, Instruction execution, Instruction value, Instruction id) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformShuffle, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + result.AddOperand(id); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformShuffleXor(Instruction resultType, Instruction execution, Instruction value, Instruction mask) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformShuffleXor, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + result.AddOperand(mask); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformShuffleUp(Instruction resultType, Instruction execution, Instruction value, Instruction delta) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformShuffleUp, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + result.AddOperand(delta); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformShuffleDown(Instruction resultType, Instruction execution, Instruction value, Instruction delta) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformShuffleDown, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + result.AddOperand(delta); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformIAdd(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformIAdd, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformFAdd(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformFAdd, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformIMul(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformIMul, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformFMul(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformFMul, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformSMin(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformSMin, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformUMin(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformUMin, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformFMin(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformFMin, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformSMax(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformSMax, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformUMax(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformUMax, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformFMax(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformFMax, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBitwiseAnd(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBitwiseAnd, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBitwiseOr(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBitwiseOr, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformBitwiseXor(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformBitwiseXor, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformLogicalAnd(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformLogicalAnd, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformLogicalOr(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformLogicalOr, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformLogicalXor(Instruction resultType, Instruction execution, GroupOperation operation, Instruction value, Instruction clusterSize = null) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformLogicalXor, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(operation); + result.AddOperand(value); + if (clusterSize != null) + { + result.AddOperand(clusterSize); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformQuadBroadcast(Instruction resultType, Instruction execution, Instruction value, Instruction index) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformQuadBroadcast, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + result.AddOperand(index); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformQuadSwap(Instruction resultType, Instruction execution, Instruction value, Instruction direction) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformQuadSwap, GetNewId(), resultType); + + result.AddOperand(execution); + result.AddOperand(value); + result.AddOperand(direction); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction GroupNonUniformPartitionNV(Instruction resultType, Instruction value) + { + Instruction result = NewInstruction(Op.OpGroupNonUniformPartitionNV, GetNewId(), resultType); + + result.AddOperand(value); + AddToFunctionDefinitions(result); + + return result; + } + + // Reserved + + public Instruction TraceRayKHR(Instruction accel, Instruction rayFlags, Instruction cullMask, Instruction sBTOffset, Instruction sBTStride, Instruction missIndex, Instruction rayOrigin, Instruction rayTmin, Instruction rayDirection, Instruction rayTmax, Instruction payload) + { + Instruction result = NewInstruction(Op.OpTraceRayKHR); + + result.AddOperand(accel); + result.AddOperand(rayFlags); + result.AddOperand(cullMask); + result.AddOperand(sBTOffset); + result.AddOperand(sBTStride); + result.AddOperand(missIndex); + result.AddOperand(rayOrigin); + result.AddOperand(rayTmin); + result.AddOperand(rayDirection); + result.AddOperand(rayTmax); + result.AddOperand(payload); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ExecuteCallableKHR(Instruction sBTIndex, Instruction callableData) + { + Instruction result = NewInstruction(Op.OpExecuteCallableKHR); + + result.AddOperand(sBTIndex); + result.AddOperand(callableData); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ConvertUToAccelerationStructureKHR(Instruction resultType, Instruction accel) + { + Instruction result = NewInstruction(Op.OpConvertUToAccelerationStructureKHR, GetNewId(), resultType); + + result.AddOperand(accel); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IgnoreIntersectionKHR() + { + Instruction result = NewInstruction(Op.OpIgnoreIntersectionKHR); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction TerminateRayKHR() + { + Instruction result = NewInstruction(Op.OpTerminateRayKHR); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction TypeRayQueryKHR() + { + Instruction result = NewInstruction(Op.OpTypeRayQueryKHR, GetNewId()); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryInitializeKHR(Instruction rayQuery, Instruction accel, Instruction rayFlags, Instruction cullMask, Instruction rayOrigin, Instruction rayTMin, Instruction rayDirection, Instruction rayTMax) + { + Instruction result = NewInstruction(Op.OpRayQueryInitializeKHR); + + result.AddOperand(rayQuery); + result.AddOperand(accel); + result.AddOperand(rayFlags); + result.AddOperand(cullMask); + result.AddOperand(rayOrigin); + result.AddOperand(rayTMin); + result.AddOperand(rayDirection); + result.AddOperand(rayTMax); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryTerminateKHR(Instruction rayQuery) + { + Instruction result = NewInstruction(Op.OpRayQueryTerminateKHR); + + result.AddOperand(rayQuery); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGenerateIntersectionKHR(Instruction rayQuery, Instruction hitT) + { + Instruction result = NewInstruction(Op.OpRayQueryGenerateIntersectionKHR); + + result.AddOperand(rayQuery); + result.AddOperand(hitT); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryConfirmIntersectionKHR(Instruction rayQuery) + { + Instruction result = NewInstruction(Op.OpRayQueryConfirmIntersectionKHR); + + result.AddOperand(rayQuery); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryProceedKHR(Instruction resultType, Instruction rayQuery) + { + Instruction result = NewInstruction(Op.OpRayQueryProceedKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionTypeKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionTypeKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FragmentMaskFetchAMD(Instruction resultType, Instruction image, Instruction coordinate) + { + Instruction result = NewInstruction(Op.OpFragmentMaskFetchAMD, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(coordinate); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FragmentFetchAMD(Instruction resultType, Instruction image, Instruction coordinate, Instruction fragmentIndex) + { + Instruction result = NewInstruction(Op.OpFragmentFetchAMD, GetNewId(), resultType); + + result.AddOperand(image); + result.AddOperand(coordinate); + result.AddOperand(fragmentIndex); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReadClockKHR(Instruction resultType, Instruction execution) + { + Instruction result = NewInstruction(Op.OpReadClockKHR, GetNewId(), resultType); + + result.AddOperand(execution); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction WritePackedPrimitiveIndices4x8NV(Instruction indexOffset, Instruction packedIndices) + { + Instruction result = NewInstruction(Op.OpWritePackedPrimitiveIndices4x8NV); + + result.AddOperand(indexOffset); + result.AddOperand(packedIndices); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReportIntersectionNV(Instruction resultType, Instruction hit, Instruction hitKind) + { + Instruction result = NewInstruction(Op.OpReportIntersectionNV, GetNewId(), resultType); + + result.AddOperand(hit); + result.AddOperand(hitKind); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ReportIntersectionKHR(Instruction resultType, Instruction hit, Instruction hitKind) + { + Instruction result = NewInstruction(Op.OpReportIntersectionKHR, GetNewId(), resultType); + + result.AddOperand(hit); + result.AddOperand(hitKind); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IgnoreIntersectionNV() + { + Instruction result = NewInstruction(Op.OpIgnoreIntersectionNV); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction TerminateRayNV() + { + Instruction result = NewInstruction(Op.OpTerminateRayNV); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction TraceNV(Instruction accel, Instruction rayFlags, Instruction cullMask, Instruction sBTOffset, Instruction sBTStride, Instruction missIndex, Instruction rayOrigin, Instruction rayTmin, Instruction rayDirection, Instruction rayTmax, Instruction payloadId) + { + Instruction result = NewInstruction(Op.OpTraceNV); + + result.AddOperand(accel); + result.AddOperand(rayFlags); + result.AddOperand(cullMask); + result.AddOperand(sBTOffset); + result.AddOperand(sBTStride); + result.AddOperand(missIndex); + result.AddOperand(rayOrigin); + result.AddOperand(rayTmin); + result.AddOperand(rayDirection); + result.AddOperand(rayTmax); + result.AddOperand(payloadId); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction TypeAccelerationStructureNV() + { + Instruction result = NewInstruction(Op.OpTypeAccelerationStructureNV, GetNewId()); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction TypeAccelerationStructureKHR() + { + Instruction result = NewInstruction(Op.OpTypeAccelerationStructureKHR, GetNewId()); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ExecuteCallableNV(Instruction sBTIndex, Instruction callableDataId) + { + Instruction result = NewInstruction(Op.OpExecuteCallableNV); + + result.AddOperand(sBTIndex); + result.AddOperand(callableDataId); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction TypeCooperativeMatrixNV(Instruction componentType, Instruction execution, Instruction rows, Instruction columns) + { + Instruction result = NewInstruction(Op.OpTypeCooperativeMatrixNV, GetNewId()); + + result.AddOperand(componentType); + result.AddOperand(execution); + result.AddOperand(rows); + result.AddOperand(columns); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CooperativeMatrixLoadNV(Instruction resultType, Instruction pointer, Instruction stride, Instruction columnMajor, MemoryAccessMask memoryAccess = (MemoryAccessMask)int.MaxValue) + { + Instruction result = NewInstruction(Op.OpCooperativeMatrixLoadNV, GetNewId(), resultType); + + result.AddOperand(pointer); + result.AddOperand(stride); + result.AddOperand(columnMajor); + if (memoryAccess != (MemoryAccessMask)int.MaxValue) + { + result.AddOperand(memoryAccess); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CooperativeMatrixStoreNV(Instruction pointer, Instruction obj, Instruction stride, Instruction columnMajor, MemoryAccessMask memoryAccess = (MemoryAccessMask)int.MaxValue) + { + Instruction result = NewInstruction(Op.OpCooperativeMatrixStoreNV); + + result.AddOperand(pointer); + result.AddOperand(obj); + result.AddOperand(stride); + result.AddOperand(columnMajor); + if (memoryAccess != (MemoryAccessMask)int.MaxValue) + { + result.AddOperand(memoryAccess); + } + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CooperativeMatrixMulAddNV(Instruction resultType, Instruction a, Instruction b, Instruction c) + { + Instruction result = NewInstruction(Op.OpCooperativeMatrixMulAddNV, GetNewId(), resultType); + + result.AddOperand(a); + result.AddOperand(b); + result.AddOperand(c); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction CooperativeMatrixLengthNV(Instruction resultType, Instruction type) + { + Instruction result = NewInstruction(Op.OpCooperativeMatrixLengthNV, GetNewId(), resultType); + + result.AddOperand(type); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction BeginInvocationInterlockEXT() + { + Instruction result = NewInstruction(Op.OpBeginInvocationInterlockEXT); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction EndInvocationInterlockEXT() + { + Instruction result = NewInstruction(Op.OpEndInvocationInterlockEXT); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction DemoteToHelperInvocationEXT() + { + Instruction result = NewInstruction(Op.OpDemoteToHelperInvocationEXT); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IsHelperInvocationEXT(Instruction resultType) + { + Instruction result = NewInstruction(Op.OpIsHelperInvocationEXT, GetNewId(), resultType); + + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UCountLeadingZerosINTEL(Instruction resultType, Instruction operand) + { + Instruction result = NewInstruction(Op.OpUCountLeadingZerosINTEL, GetNewId(), resultType); + + result.AddOperand(operand); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UCountTrailingZerosINTEL(Instruction resultType, Instruction operand) + { + Instruction result = NewInstruction(Op.OpUCountTrailingZerosINTEL, GetNewId(), resultType); + + result.AddOperand(operand); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AbsISubINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpAbsISubINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction AbsUSubINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpAbsUSubINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IAddSatINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpIAddSatINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UAddSatINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUAddSatINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IAverageINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpIAverageINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UAverageINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUAverageINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IAverageRoundedINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpIAverageRoundedINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UAverageRoundedINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUAverageRoundedINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction ISubSatINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpISubSatINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction USubSatINTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUSubSatINTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction IMul32x16INTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpIMul32x16INTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction UMul32x16INTEL(Instruction resultType, Instruction operand1, Instruction operand2) + { + Instruction result = NewInstruction(Op.OpUMul32x16INTEL, GetNewId(), resultType); + + result.AddOperand(operand1); + result.AddOperand(operand2); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction LoopControlINTEL(params LiteralInteger[] loopControlParameters) + { + Instruction result = NewInstruction(Op.OpLoopControlINTEL); + + result.AddOperand(loopControlParameters); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction FPGARegINTEL(Instruction resultType, Instruction resultObj, Instruction input) + { + Instruction result = NewInstruction(Op.OpFPGARegINTEL, GetNewId(), resultType); + + result.AddOperand(resultObj); + result.AddOperand(input); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetRayTMinKHR(Instruction resultType, Instruction rayQuery) + { + Instruction result = NewInstruction(Op.OpRayQueryGetRayTMinKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetRayFlagsKHR(Instruction resultType, Instruction rayQuery) + { + Instruction result = NewInstruction(Op.OpRayQueryGetRayFlagsKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionTKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionTKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionInstanceCustomIndexKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionInstanceCustomIndexKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionInstanceIdKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionInstanceIdKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionGeometryIndexKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionGeometryIndexKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionPrimitiveIndexKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionPrimitiveIndexKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionBarycentricsKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionBarycentricsKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionFrontFaceKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionFrontFaceKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionCandidateAABBOpaqueKHR(Instruction resultType, Instruction rayQuery) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionCandidateAABBOpaqueKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionObjectRayDirectionKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionObjectRayDirectionKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionObjectRayOriginKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionObjectRayOriginKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetWorldRayDirectionKHR(Instruction resultType, Instruction rayQuery) + { + Instruction result = NewInstruction(Op.OpRayQueryGetWorldRayDirectionKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetWorldRayOriginKHR(Instruction resultType, Instruction rayQuery) + { + Instruction result = NewInstruction(Op.OpRayQueryGetWorldRayOriginKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionObjectToWorldKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionObjectToWorldKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + public Instruction RayQueryGetIntersectionWorldToObjectKHR(Instruction resultType, Instruction rayQuery, Instruction intersection) + { + Instruction result = NewInstruction(Op.OpRayQueryGetIntersectionWorldToObjectKHR, GetNewId(), resultType); + + result.AddOperand(rayQuery); + result.AddOperand(intersection); + AddToFunctionDefinitions(result); + + return result; + } + + } +} diff --git a/Spv.Generator/Autogenerated/GlslStd450Grammar.cs b/Spv.Generator/Autogenerated/GlslStd450Grammar.cs new file mode 100644 index 000000000..4722d2e49 --- /dev/null +++ b/Spv.Generator/Autogenerated/GlslStd450Grammar.cs @@ -0,0 +1,441 @@ +// AUTOGENERATED: DO NOT EDIT +// Last update date: 2021-01-06 23:02:26.955269 +#region Grammar License +// Copyright (c) 2014-2016 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. +#endregion + +using static Spv.Specification; + +namespace Spv.Generator +{ + public partial class Module + { + public Instruction GlslRound(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 1, x); + } + + public Instruction GlslRoundEven(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 2, x); + } + + public Instruction GlslTrunc(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 3, x); + } + + public Instruction GlslFAbs(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 4, x); + } + + public Instruction GlslSAbs(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 5, x); + } + + public Instruction GlslFSign(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 6, x); + } + + public Instruction GlslSSign(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 7, x); + } + + public Instruction GlslFloor(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 8, x); + } + + public Instruction GlslCeil(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 9, x); + } + + public Instruction GlslFract(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 10, x); + } + + public Instruction GlslRadians(Instruction resultType, Instruction degrees) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 11, degrees); + } + + public Instruction GlslDegrees(Instruction resultType, Instruction radians) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 12, radians); + } + + public Instruction GlslSin(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 13, x); + } + + public Instruction GlslCos(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 14, x); + } + + public Instruction GlslTan(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 15, x); + } + + public Instruction GlslAsin(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 16, x); + } + + public Instruction GlslAcos(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 17, x); + } + + public Instruction GlslAtan(Instruction resultType, Instruction y_over_x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 18, y_over_x); + } + + public Instruction GlslSinh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 19, x); + } + + public Instruction GlslCosh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 20, x); + } + + public Instruction GlslTanh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 21, x); + } + + public Instruction GlslAsinh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 22, x); + } + + public Instruction GlslAcosh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 23, x); + } + + public Instruction GlslAtanh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 24, x); + } + + public Instruction GlslAtan2(Instruction resultType, Instruction y, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 25, y, x); + } + + public Instruction GlslPow(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 26, x, y); + } + + public Instruction GlslExp(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 27, x); + } + + public Instruction GlslLog(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 28, x); + } + + public Instruction GlslExp2(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 29, x); + } + + public Instruction GlslLog2(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 30, x); + } + + public Instruction GlslSqrt(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 31, x); + } + + public Instruction GlslInverseSqrt(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 32, x); + } + + public Instruction GlslDeterminant(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 33, x); + } + + public Instruction GlslMatrixInverse(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 34, x); + } + + public Instruction GlslModf(Instruction resultType, Instruction x, Instruction i) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 35, x, i); + } + + public Instruction GlslModfStruct(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 36, x); + } + + public Instruction GlslFMin(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 37, x, y); + } + + public Instruction GlslUMin(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 38, x, y); + } + + public Instruction GlslSMin(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 39, x, y); + } + + public Instruction GlslFMax(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 40, x, y); + } + + public Instruction GlslUMax(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 41, x, y); + } + + public Instruction GlslSMax(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 42, x, y); + } + + public Instruction GlslFClamp(Instruction resultType, Instruction x, Instruction minVal, Instruction maxVal) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 43, x, minVal, maxVal); + } + + public Instruction GlslUClamp(Instruction resultType, Instruction x, Instruction minVal, Instruction maxVal) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 44, x, minVal, maxVal); + } + + public Instruction GlslSClamp(Instruction resultType, Instruction x, Instruction minVal, Instruction maxVal) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 45, x, minVal, maxVal); + } + + public Instruction GlslFMix(Instruction resultType, Instruction x, Instruction y, Instruction a) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 46, x, y, a); + } + + public Instruction GlslIMix(Instruction resultType, Instruction x, Instruction y, Instruction a) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 47, x, y, a); + } + + public Instruction GlslStep(Instruction resultType, Instruction edge, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 48, edge, x); + } + + public Instruction GlslSmoothStep(Instruction resultType, Instruction edge0, Instruction edge1, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 49, edge0, edge1, x); + } + + public Instruction GlslFma(Instruction resultType, Instruction a, Instruction b, Instruction c) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 50, a, b, c); + } + + public Instruction GlslFrexp(Instruction resultType, Instruction x, Instruction exp) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 51, x, exp); + } + + public Instruction GlslFrexpStruct(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 52, x); + } + + public Instruction GlslLdexp(Instruction resultType, Instruction x, Instruction exp) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 53, x, exp); + } + + public Instruction GlslPackSnorm4x8(Instruction resultType, Instruction v) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 54, v); + } + + public Instruction GlslPackUnorm4x8(Instruction resultType, Instruction v) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 55, v); + } + + public Instruction GlslPackSnorm2x16(Instruction resultType, Instruction v) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 56, v); + } + + public Instruction GlslPackUnorm2x16(Instruction resultType, Instruction v) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 57, v); + } + + public Instruction GlslPackHalf2x16(Instruction resultType, Instruction v) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 58, v); + } + + public Instruction GlslPackDouble2x32(Instruction resultType, Instruction v) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 59, v); + } + + public Instruction GlslUnpackSnorm2x16(Instruction resultType, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 60, p); + } + + public Instruction GlslUnpackUnorm2x16(Instruction resultType, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 61, p); + } + + public Instruction GlslUnpackHalf2x16(Instruction resultType, Instruction v) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 62, v); + } + + public Instruction GlslUnpackSnorm4x8(Instruction resultType, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 63, p); + } + + public Instruction GlslUnpackUnorm4x8(Instruction resultType, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 64, p); + } + + public Instruction GlslUnpackDouble2x32(Instruction resultType, Instruction v) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 65, v); + } + + public Instruction GlslLength(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 66, x); + } + + public Instruction GlslDistance(Instruction resultType, Instruction p0, Instruction p1) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 67, p0, p1); + } + + public Instruction GlslCross(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 68, x, y); + } + + public Instruction GlslNormalize(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 69, x); + } + + public Instruction GlslFaceForward(Instruction resultType, Instruction n, Instruction i, Instruction nref) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 70, n, i, nref); + } + + public Instruction GlslReflect(Instruction resultType, Instruction i, Instruction n) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 71, i, n); + } + + public Instruction GlslRefract(Instruction resultType, Instruction i, Instruction n, Instruction eta) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 72, i, n, eta); + } + + public Instruction GlslFindILsb(Instruction resultType, Instruction value) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 73, value); + } + + public Instruction GlslFindSMsb(Instruction resultType, Instruction value) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 74, value); + } + + public Instruction GlslFindUMsb(Instruction resultType, Instruction value) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 75, value); + } + + public Instruction GlslInterpolateAtCentroid(Instruction resultType, Instruction interpolant) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 76, interpolant); + } + + public Instruction GlslInterpolateAtSample(Instruction resultType, Instruction interpolant, Instruction sample) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 77, interpolant, sample); + } + + public Instruction GlslInterpolateAtOffset(Instruction resultType, Instruction interpolant, Instruction offset) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 78, interpolant, offset); + } + + public Instruction GlslNMin(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 79, x, y); + } + + public Instruction GlslNMax(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 80, x, y); + } + + public Instruction GlslNClamp(Instruction resultType, Instruction x, Instruction minVal, Instruction maxVal) + { + return ExtInst(resultType, AddExtInstImport("GLSL.std.450"), 81, x, minVal, maxVal); + } + + } +} diff --git a/Spv.Generator/Autogenerated/OpenClGrammar.cs b/Spv.Generator/Autogenerated/OpenClGrammar.cs new file mode 100644 index 000000000..ac990fbc8 --- /dev/null +++ b/Spv.Generator/Autogenerated/OpenClGrammar.cs @@ -0,0 +1,841 @@ +// AUTOGENERATED: DO NOT EDIT +// Last update date: 2021-01-06 23:02:27.020534 +#region Grammar License +// Copyright (c) 2014-2016 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. +#endregion + +using static Spv.Specification; + +namespace Spv.Generator +{ + public partial class Module + { + public Instruction OpenClAcos(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 0, x); + } + + public Instruction OpenClAcosh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 1, x); + } + + public Instruction OpenClAcospi(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 2, x); + } + + public Instruction OpenClAsin(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 3, x); + } + + public Instruction OpenClAsinh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 4, x); + } + + public Instruction OpenClAsinpi(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 5, x); + } + + public Instruction OpenClAtan(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 6, x); + } + + public Instruction OpenClAtan2(Instruction resultType, Instruction y, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 7, y, x); + } + + public Instruction OpenClAtanh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 8, x); + } + + public Instruction OpenClAtanpi(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 9, x); + } + + public Instruction OpenClAtan2pi(Instruction resultType, Instruction y, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 10, y, x); + } + + public Instruction OpenClCbrt(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 11, x); + } + + public Instruction OpenClCeil(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 12, x); + } + + public Instruction OpenClCopysign(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 13, x, y); + } + + public Instruction OpenClCos(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 14, x); + } + + public Instruction OpenClCosh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 15, x); + } + + public Instruction OpenClCospi(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 16, x); + } + + public Instruction OpenClErfc(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 17, x); + } + + public Instruction OpenClErf(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 18, x); + } + + public Instruction OpenClExp(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 19, x); + } + + public Instruction OpenClExp2(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 20, x); + } + + public Instruction OpenClExp10(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 21, x); + } + + public Instruction OpenClExpm1(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 22, x); + } + + public Instruction OpenClFabs(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 23, x); + } + + public Instruction OpenClFdim(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 24, x, y); + } + + public Instruction OpenClFloor(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 25, x); + } + + public Instruction OpenClFma(Instruction resultType, Instruction a, Instruction b, Instruction c) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 26, a, b, c); + } + + public Instruction OpenClFmax(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 27, x, y); + } + + public Instruction OpenClFmin(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 28, x, y); + } + + public Instruction OpenClFmod(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 29, x, y); + } + + public Instruction OpenClFract(Instruction resultType, Instruction x, Instruction ptr) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 30, x, ptr); + } + + public Instruction OpenClFrexp(Instruction resultType, Instruction x, Instruction exp) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 31, x, exp); + } + + public Instruction OpenClHypot(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 32, x, y); + } + + public Instruction OpenClIlogb(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 33, x); + } + + public Instruction OpenClLdexp(Instruction resultType, Instruction x, Instruction k) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 34, x, k); + } + + public Instruction OpenClLgamma(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 35, x); + } + + public Instruction OpenClLgamma_r(Instruction resultType, Instruction x, Instruction signp) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 36, x, signp); + } + + public Instruction OpenClLog(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 37, x); + } + + public Instruction OpenClLog2(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 38, x); + } + + public Instruction OpenClLog10(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 39, x); + } + + public Instruction OpenClLog1p(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 40, x); + } + + public Instruction OpenClLogb(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 41, x); + } + + public Instruction OpenClMad(Instruction resultType, Instruction a, Instruction b, Instruction c) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 42, a, b, c); + } + + public Instruction OpenClMaxmag(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 43, x, y); + } + + public Instruction OpenClMinmag(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 44, x, y); + } + + public Instruction OpenClModf(Instruction resultType, Instruction x, Instruction iptr) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 45, x, iptr); + } + + public Instruction OpenClNan(Instruction resultType, Instruction nancode) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 46, nancode); + } + + public Instruction OpenClNextafter(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 47, x, y); + } + + public Instruction OpenClPow(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 48, x, y); + } + + public Instruction OpenClPown(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 49, x, y); + } + + public Instruction OpenClPowr(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 50, x, y); + } + + public Instruction OpenClRemainder(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 51, x, y); + } + + public Instruction OpenClRemquo(Instruction resultType, Instruction x, Instruction y, Instruction quo) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 52, x, y, quo); + } + + public Instruction OpenClRint(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 53, x); + } + + public Instruction OpenClRootn(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 54, x, y); + } + + public Instruction OpenClRound(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 55, x); + } + + public Instruction OpenClRsqrt(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 56, x); + } + + public Instruction OpenClSin(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 57, x); + } + + public Instruction OpenClSincos(Instruction resultType, Instruction x, Instruction cosval) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 58, x, cosval); + } + + public Instruction OpenClSinh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 59, x); + } + + public Instruction OpenClSinpi(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 60, x); + } + + public Instruction OpenClSqrt(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 61, x); + } + + public Instruction OpenClTan(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 62, x); + } + + public Instruction OpenClTanh(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 63, x); + } + + public Instruction OpenClTanpi(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 64, x); + } + + public Instruction OpenClTgamma(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 65, x); + } + + public Instruction OpenClTrunc(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 66, x); + } + + public Instruction OpenClHalf_cos(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 67, x); + } + + public Instruction OpenClHalf_divide(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 68, x, y); + } + + public Instruction OpenClHalf_exp(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 69, x); + } + + public Instruction OpenClHalf_exp2(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 70, x); + } + + public Instruction OpenClHalf_exp10(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 71, x); + } + + public Instruction OpenClHalf_log(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 72, x); + } + + public Instruction OpenClHalf_log2(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 73, x); + } + + public Instruction OpenClHalf_log10(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 74, x); + } + + public Instruction OpenClHalf_powr(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 75, x, y); + } + + public Instruction OpenClHalf_recip(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 76, x); + } + + public Instruction OpenClHalf_rsqrt(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 77, x); + } + + public Instruction OpenClHalf_sin(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 78, x); + } + + public Instruction OpenClHalf_sqrt(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 79, x); + } + + public Instruction OpenClHalf_tan(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 80, x); + } + + public Instruction OpenClNative_cos(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 81, x); + } + + public Instruction OpenClNative_divide(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 82, x, y); + } + + public Instruction OpenClNative_exp(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 83, x); + } + + public Instruction OpenClNative_exp2(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 84, x); + } + + public Instruction OpenClNative_exp10(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 85, x); + } + + public Instruction OpenClNative_log(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 86, x); + } + + public Instruction OpenClNative_log2(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 87, x); + } + + public Instruction OpenClNative_log10(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 88, x); + } + + public Instruction OpenClNative_powr(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 89, x, y); + } + + public Instruction OpenClNative_recip(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 90, x); + } + + public Instruction OpenClNative_rsqrt(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 91, x); + } + + public Instruction OpenClNative_sin(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 92, x); + } + + public Instruction OpenClNative_sqrt(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 93, x); + } + + public Instruction OpenClNative_tan(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 94, x); + } + + public Instruction OpenClS_abs(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 141, x); + } + + public Instruction OpenClS_abs_diff(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 142, x, y); + } + + public Instruction OpenClS_add_sat(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 143, x, y); + } + + public Instruction OpenClU_add_sat(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 144, x, y); + } + + public Instruction OpenClS_hadd(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 145, x, y); + } + + public Instruction OpenClU_hadd(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 146, x, y); + } + + public Instruction OpenClS_rhadd(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 147, x, y); + } + + public Instruction OpenClU_rhadd(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 148, x, y); + } + + public Instruction OpenClS_clamp(Instruction resultType, Instruction x, Instruction minval, Instruction maxval) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 149, x, minval, maxval); + } + + public Instruction OpenClU_clamp(Instruction resultType, Instruction x, Instruction minval, Instruction maxval) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 150, x, minval, maxval); + } + + public Instruction OpenClClz(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 151, x); + } + + public Instruction OpenClCtz(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 152, x); + } + + public Instruction OpenClS_mad_hi(Instruction resultType, Instruction a, Instruction b, Instruction c) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 153, a, b, c); + } + + public Instruction OpenClU_mad_sat(Instruction resultType, Instruction x, Instruction y, Instruction z) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 154, x, y, z); + } + + public Instruction OpenClS_mad_sat(Instruction resultType, Instruction x, Instruction y, Instruction z) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 155, x, y, z); + } + + public Instruction OpenClS_max(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 156, x, y); + } + + public Instruction OpenClU_max(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 157, x, y); + } + + public Instruction OpenClS_min(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 158, x, y); + } + + public Instruction OpenClU_min(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 159, x, y); + } + + public Instruction OpenClS_mul_hi(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 160, x, y); + } + + public Instruction OpenClRotate(Instruction resultType, Instruction v, Instruction i) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 161, v, i); + } + + public Instruction OpenClS_sub_sat(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 162, x, y); + } + + public Instruction OpenClU_sub_sat(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 163, x, y); + } + + public Instruction OpenClU_upsample(Instruction resultType, Instruction hi, Instruction lo) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 164, hi, lo); + } + + public Instruction OpenClS_upsample(Instruction resultType, Instruction hi, Instruction lo) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 165, hi, lo); + } + + public Instruction OpenClPopcount(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 166, x); + } + + public Instruction OpenClS_mad24(Instruction resultType, Instruction x, Instruction y, Instruction z) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 167, x, y, z); + } + + public Instruction OpenClU_mad24(Instruction resultType, Instruction x, Instruction y, Instruction z) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 168, x, y, z); + } + + public Instruction OpenClS_mul24(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 169, x, y); + } + + public Instruction OpenClU_mul24(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 170, x, y); + } + + public Instruction OpenClU_abs(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 201, x); + } + + public Instruction OpenClU_abs_diff(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 202, x, y); + } + + public Instruction OpenClU_mul_hi(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 203, x, y); + } + + public Instruction OpenClU_mad_hi(Instruction resultType, Instruction a, Instruction b, Instruction c) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 204, a, b, c); + } + + public Instruction OpenClFclamp(Instruction resultType, Instruction x, Instruction minval, Instruction maxval) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 95, x, minval, maxval); + } + + public Instruction OpenClDegrees(Instruction resultType, Instruction radians) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 96, radians); + } + + public Instruction OpenClFmax_common(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 97, x, y); + } + + public Instruction OpenClFmin_common(Instruction resultType, Instruction x, Instruction y) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 98, x, y); + } + + public Instruction OpenClMix(Instruction resultType, Instruction x, Instruction y, Instruction a) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 99, x, y, a); + } + + public Instruction OpenClRadians(Instruction resultType, Instruction degrees) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 100, degrees); + } + + public Instruction OpenClStep(Instruction resultType, Instruction edge, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 101, edge, x); + } + + public Instruction OpenClSmoothstep(Instruction resultType, Instruction edge0, Instruction edge1, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 102, edge0, edge1, x); + } + + public Instruction OpenClSign(Instruction resultType, Instruction x) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 103, x); + } + + public Instruction OpenClCross(Instruction resultType, Instruction p0, Instruction p1) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 104, p0, p1); + } + + public Instruction OpenClDistance(Instruction resultType, Instruction p0, Instruction p1) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 105, p0, p1); + } + + public Instruction OpenClLength(Instruction resultType, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 106, p); + } + + public Instruction OpenClNormalize(Instruction resultType, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 107, p); + } + + public Instruction OpenClFast_distance(Instruction resultType, Instruction p0, Instruction p1) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 108, p0, p1); + } + + public Instruction OpenClFast_length(Instruction resultType, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 109, p); + } + + public Instruction OpenClFast_normalize(Instruction resultType, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 110, p); + } + + public Instruction OpenClBitselect(Instruction resultType, Instruction a, Instruction b, Instruction c) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 186, a, b, c); + } + + public Instruction OpenClSelect(Instruction resultType, Instruction a, Instruction b, Instruction c) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 187, a, b, c); + } + + public Instruction OpenClVloadn(Instruction resultType, Instruction offset, Instruction p, LiteralInteger n) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 171, offset, p, n); + } + + public Instruction OpenClVstoren(Instruction resultType, Instruction data, Instruction offset, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 172, data, offset, p); + } + + public Instruction OpenClVload_half(Instruction resultType, Instruction offset, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 173, offset, p); + } + + public Instruction OpenClVload_halfn(Instruction resultType, Instruction offset, Instruction p, LiteralInteger n) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 174, offset, p, n); + } + + public Instruction OpenClVstore_half(Instruction resultType, Instruction data, Instruction offset, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 175, data, offset, p); + } + + public Instruction OpenClVstore_half_r(Instruction resultType, Instruction data, Instruction offset, Instruction p, FPRoundingMode mode) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 176, data, offset, p, LiteralInteger.CreateForEnum(mode)); + } + + public Instruction OpenClVstore_halfn(Instruction resultType, Instruction data, Instruction offset, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 177, data, offset, p); + } + + public Instruction OpenClVstore_halfn_r(Instruction resultType, Instruction data, Instruction offset, Instruction p, FPRoundingMode mode) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 178, data, offset, p, LiteralInteger.CreateForEnum(mode)); + } + + public Instruction OpenClVloada_halfn(Instruction resultType, Instruction offset, Instruction p, LiteralInteger n) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 179, offset, p, n); + } + + public Instruction OpenClVstorea_halfn(Instruction resultType, Instruction data, Instruction offset, Instruction p) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 180, data, offset, p); + } + + public Instruction OpenClVstorea_halfn_r(Instruction resultType, Instruction data, Instruction offset, Instruction p, FPRoundingMode mode) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 181, data, offset, p, LiteralInteger.CreateForEnum(mode)); + } + + public Instruction OpenClShuffle(Instruction resultType, Instruction x, Instruction shufflemask) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 182, x, shufflemask); + } + + public Instruction OpenClShuffle2(Instruction resultType, Instruction x, Instruction y, Instruction shufflemask) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 183, x, y, shufflemask); + } + + public Instruction OpenClPrefetch(Instruction resultType, Instruction ptr, Instruction numelements) + { + return ExtInst(resultType, AddExtInstImport("OpenCL.std"), 185, ptr, numelements); + } + + } +} diff --git a/Spv.Generator/ConstantKey.cs b/Spv.Generator/ConstantKey.cs new file mode 100644 index 000000000..d3c1b905a --- /dev/null +++ b/Spv.Generator/ConstantKey.cs @@ -0,0 +1,30 @@ +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Spv.Generator +{ + internal struct ConstantKey : IEquatable + { + private Instruction _constant; + + public ConstantKey(Instruction constant) + { + _constant = constant; + } + + public override int GetHashCode() + { + return HashCode.Combine(_constant.Opcode, _constant.GetHashCodeContent(), _constant.GetHashCodeResultType()); + } + + public bool Equals(ConstantKey other) + { + return _constant.Opcode == other._constant.Opcode && _constant.EqualsContent(other._constant) && _constant.EqualsResultType(other._constant); + } + + public override bool Equals([NotNullWhen(true)] object obj) + { + return obj is ConstantKey && Equals((ConstantKey)obj); + } + } +} diff --git a/Spv.Generator/DeterministicHashCode.cs b/Spv.Generator/DeterministicHashCode.cs new file mode 100644 index 000000000..caba7ad32 --- /dev/null +++ b/Spv.Generator/DeterministicHashCode.cs @@ -0,0 +1,110 @@ +using System; +using System.Collections.Generic; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Spv.Generator +{ + /// + /// Similar to System.HashCode, but without introducing random values. + /// The same primes and shifts are used. + /// + internal static class DeterministicHashCode + { + private const uint Prime1 = 2654435761U; + private const uint Prime2 = 2246822519U; + private const uint Prime3 = 3266489917U; + private const uint Prime4 = 668265263U; + + public static int GetHashCode(string value) + { + uint hash = (uint)value.Length + Prime1; + + for (int i = 0; i < value.Length; i++) + { + hash += (hash << 7) ^ value[i]; + } + + return (int)MixFinal(hash); + } + + public static int Combine(ReadOnlySpan values) + { + uint hashCode = Prime2; + hashCode += 4 * (uint)values.Length; + + foreach (T value in values) + { + uint hc = (uint)(value?.GetHashCode() ?? 0); + hashCode = MixStep(hashCode, hc); + } + + return (int)MixFinal(hashCode); + } + + public static int Combine(T1 value1, T2 value2) + { + uint hc1 = (uint)(value1?.GetHashCode() ?? 0); + uint hc2 = (uint)(value2?.GetHashCode() ?? 0); + + uint hash = Prime2; + hash += 8; + + hash = MixStep(hash, hc1); + hash = MixStep(hash, hc2); + + return (int)MixFinal(hash); + } + + public static int Combine(T1 value1, T2 value2, T3 value3) + { + uint hc1 = (uint)(value1?.GetHashCode() ?? 0); + uint hc2 = (uint)(value2?.GetHashCode() ?? 0); + uint hc3 = (uint)(value3?.GetHashCode() ?? 0); + + uint hash = Prime2; + hash += 12; + + hash = MixStep(hash, hc1); + hash = MixStep(hash, hc2); + hash = MixStep(hash, hc3); + + return (int)MixFinal(hash); + } + + public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4) + { + uint hc1 = (uint)(value1?.GetHashCode() ?? 0); + uint hc2 = (uint)(value2?.GetHashCode() ?? 0); + uint hc3 = (uint)(value3?.GetHashCode() ?? 0); + uint hc4 = (uint)(value4?.GetHashCode() ?? 0); + + uint hash = Prime2; + hash += 16; + + hash = MixStep(hash, hc1); + hash = MixStep(hash, hc2); + hash = MixStep(hash, hc3); + hash = MixStep(hash, hc4); + + return (int)MixFinal(hash); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint MixStep(uint hashCode, uint mixValue) + { + return BitOperations.RotateLeft(hashCode + mixValue * Prime3, 17) * Prime4; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint MixFinal(uint hash) + { + hash ^= hash >> 15; + hash *= Prime2; + hash ^= hash >> 13; + hash *= Prime3; + hash ^= hash >> 16; + return hash; + } + } +} diff --git a/Spv.Generator/DeterministicStringKey.cs b/Spv.Generator/DeterministicStringKey.cs new file mode 100644 index 000000000..491bb745a --- /dev/null +++ b/Spv.Generator/DeterministicStringKey.cs @@ -0,0 +1,30 @@ +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Spv.Generator +{ + internal class DeterministicStringKey : IEquatable + { + private string _value; + + public DeterministicStringKey(string value) + { + _value = value; + } + + public override int GetHashCode() + { + return DeterministicHashCode.GetHashCode(_value); + } + + public bool Equals(DeterministicStringKey other) + { + return _value == other._value; + } + + public override bool Equals([NotNullWhen(true)] object obj) + { + return obj is DeterministicStringKey && Equals((DeterministicStringKey)obj); + } + } +} diff --git a/Spv.Generator/GeneratorPool.cs b/Spv.Generator/GeneratorPool.cs new file mode 100644 index 000000000..f6c92918e --- /dev/null +++ b/Spv.Generator/GeneratorPool.cs @@ -0,0 +1,58 @@ +using System.Collections.Generic; + +namespace Spv.Generator +{ + public class GeneratorPool where T : class, new() + { + private List _pool; + private int _chunkIndex = -1; + private int _poolIndex = -1; + private int _initialSize; + private int _poolSizeIncrement; + + public GeneratorPool(): this(1000, 200) { } + + public GeneratorPool(int chunkSizeLimit, int poolSizeIncrement) + { + _initialSize = chunkSizeLimit; + _poolSizeIncrement = poolSizeIncrement; + + _pool = new(chunkSizeLimit * 2); + + AddChunkIfNeeded(); + } + + public T Allocate() + { + if (++_poolIndex >= _poolSizeIncrement) + { + AddChunkIfNeeded(); + + _poolIndex = 0; + } + + return _pool[_chunkIndex][_poolIndex]; + } + + private void AddChunkIfNeeded() + { + if (++_chunkIndex >= _pool.Count) + { + T[] pool = new T[_poolSizeIncrement]; + + for (int i = 0; i < _poolSizeIncrement; i++) + { + pool[i] = new T(); + } + + _pool.Add(pool); + } + } + + public void Clear() + { + _chunkIndex = 0; + _poolIndex = -1; + } + } +} diff --git a/Spv.Generator/Instruction.cs b/Spv.Generator/Instruction.cs new file mode 100644 index 000000000..922994049 --- /dev/null +++ b/Spv.Generator/Instruction.cs @@ -0,0 +1,232 @@ +using System; +using System.Diagnostics; +using System.IO; + +namespace Spv.Generator +{ + public sealed class Instruction : Operand, IEquatable + { + public const uint InvalidId = uint.MaxValue; + + public Specification.Op Opcode { get; private set; } + private Instruction _resultType; + private InstructionOperands _operands; + + public uint Id { get; set; } + + public Instruction() { } + + public void Set(Specification.Op opcode, uint id = InvalidId, Instruction resultType = null) + { + Opcode = opcode; + Id = id; + _resultType = resultType; + + _operands = new InstructionOperands(); + } + + public void SetId(uint id) + { + Id = id; + } + + public OperandType Type => OperandType.Instruction; + + public ushort GetTotalWordCount() + { + ushort result = WordCount; + + if (Id != InvalidId) + { + result++; + } + + if (_resultType != null) + { + result += _resultType.WordCount; + } + + Span operands = _operands.ToSpan(); + for (int i = 0; i < operands.Length; i++) + { + result += operands[i].WordCount; + } + + return result; + } + + public ushort WordCount => 1; + + public void AddOperand(Operand value) + { + Debug.Assert(value != null); + _operands.Add(value); + } + + public void AddOperand(Operand[] value) + { + foreach (Operand instruction in value) + { + AddOperand(instruction); + } + } + + public void AddOperand(LiteralInteger[] value) + { + foreach (LiteralInteger instruction in value) + { + AddOperand(instruction); + } + } + + public void AddOperand(LiteralInteger value) + { + AddOperand((Operand)value); + } + + public void AddOperand(Instruction[] value) + { + foreach (Instruction instruction in value) + { + AddOperand(instruction); + } + } + + public void AddOperand(Instruction value) + { + AddOperand((Operand)value); + } + + public void AddOperand(string value) + { + AddOperand(new LiteralString(value)); + } + + public void AddOperand(T value) where T: Enum + { + AddOperand(LiteralInteger.CreateForEnum(value)); + } + + public void Write(BinaryWriter writer) + { + // Word 0 + writer.Write((ushort)Opcode); + writer.Write(GetTotalWordCount()); + + _resultType?.WriteOperand(writer); + + if (Id != InvalidId) + { + writer.Write(Id); + } + + Span operands = _operands.ToSpan(); + for (int i = 0; i < operands.Length; i++) + { + operands[i].WriteOperand(writer); + } + } + + public void WriteOperand(BinaryWriter writer) + { + Debug.Assert(Id != InvalidId); + + if (Id == InvalidId) + { + string methodToCall; + + if (Opcode == Specification.Op.OpVariable) + { + methodToCall = "AddLocalVariable or AddGlobalVariable"; + } + else if (Opcode == Specification.Op.OpLabel) + { + methodToCall = "AddLabel"; + } + else + { + throw new InvalidOperationException("Internal error"); + } + + throw new InvalidOperationException($"Id wasn't bound to the module, please make sure to call {methodToCall}"); + } + + writer.Write(Id); + } + + public override bool Equals(object obj) + { + return obj is Instruction instruction && Equals(instruction); + } + + public bool Equals(Instruction cmpObj) + { + bool result = Type == cmpObj.Type && Id == cmpObj.Id; + + if (result) + { + if (_resultType != null && cmpObj._resultType != null) + { + result &= _resultType.Equals(cmpObj._resultType); + } + else if (_resultType != null || cmpObj._resultType != null) + { + return false; + } + } + + if (result) + { + result &= EqualsContent(cmpObj); + } + + return result; + } + + public bool EqualsContent(Instruction cmpObj) + { + Span thisOperands = _operands.ToSpan(); + Span cmpOperands = cmpObj._operands.ToSpan(); + + if (thisOperands.Length != cmpOperands.Length) + { + return false; + } + + for (int i = 0; i < thisOperands.Length; i++) + { + if (!thisOperands[i].Equals(cmpOperands[i])) + { + return false; + } + } + + return true; + } + + public bool EqualsResultType(Instruction cmpObj) + { + return _resultType.Opcode == cmpObj._resultType.Opcode && _resultType.EqualsContent(cmpObj._resultType); + } + + public int GetHashCodeContent() + { + return DeterministicHashCode.Combine(_operands.ToSpan()); + } + + public int GetHashCodeResultType() + { + return DeterministicHashCode.Combine(_resultType.Opcode, _resultType.GetHashCodeContent()); + } + + public override int GetHashCode() + { + return DeterministicHashCode.Combine(Opcode, Id, _resultType, DeterministicHashCode.Combine(_operands.ToSpan())); + } + + public bool Equals(Operand obj) + { + return obj is Instruction instruction && Equals(instruction); + } + } +} diff --git a/Spv.Generator/InstructionOperands.cs b/Spv.Generator/InstructionOperands.cs new file mode 100644 index 000000000..3e53e60e7 --- /dev/null +++ b/Spv.Generator/InstructionOperands.cs @@ -0,0 +1,57 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System.Threading.Tasks; + +namespace Spv.Generator +{ + public struct InstructionOperands + { + private const int InternalCount = 5; + + public int Count; + public Operand Operand1; + public Operand Operand2; + public Operand Operand3; + public Operand Operand4; + public Operand Operand5; + public Operand[] Overflow; + + public Span ToSpan() + { + if (Count > InternalCount) + { + return MemoryMarshal.CreateSpan(ref this.Overflow[0], Count); + } + else + { + return MemoryMarshal.CreateSpan(ref this.Operand1, Count); + } + } + + public void Add(Operand operand) + { + if (Count < InternalCount) + { + MemoryMarshal.CreateSpan(ref this.Operand1, Count + 1)[Count] = operand; + Count++; + } + else + { + if (Overflow == null) + { + Overflow = new Operand[InternalCount * 2]; + MemoryMarshal.CreateSpan(ref this.Operand1, InternalCount).CopyTo(Overflow.AsSpan()); + } + else if (Count == Overflow.Length) + { + Array.Resize(ref Overflow, Overflow.Length * 2); + } + + Overflow[Count++] = operand; + } + } + } +} diff --git a/Spv.Generator/LICENSE b/Spv.Generator/LICENSE new file mode 100644 index 000000000..31aa79387 --- /dev/null +++ b/Spv.Generator/LICENSE @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/Spv.Generator/LiteralInteger.cs b/Spv.Generator/LiteralInteger.cs new file mode 100644 index 000000000..3193ed6e5 --- /dev/null +++ b/Spv.Generator/LiteralInteger.cs @@ -0,0 +1,103 @@ +using System; +using System.IO; + +namespace Spv.Generator +{ + public class LiteralInteger : Operand, IEquatable + { + [ThreadStatic] + private static GeneratorPool _pool; + + internal static void RegisterPool(GeneratorPool pool) + { + _pool = pool; + } + + internal static void UnregisterPool() + { + _pool = null; + } + + public OperandType Type => OperandType.Number; + + private enum IntegerType + { + UInt32, + Int32, + UInt64, + Int64, + Float32, + Float64, + } + + private IntegerType _integerType; + private ulong _data; + + public ushort WordCount { get; private set; } + + public LiteralInteger() { } + + private static LiteralInteger New() + { + return _pool.Allocate(); + } + + private LiteralInteger Set(ulong data, IntegerType integerType, ushort wordCount) + { + _data = data; + _integerType = integerType; + + WordCount = wordCount; + + return this; + } + + public static implicit operator LiteralInteger(int value) => New().Set((ulong)value, IntegerType.Int32, 1); + public static implicit operator LiteralInteger(uint value) => New().Set(value, IntegerType.UInt32, 1); + public static implicit operator LiteralInteger(long value) => New().Set((ulong)value, IntegerType.Int64, 2); + public static implicit operator LiteralInteger(ulong value) => New().Set(value, IntegerType.UInt64, 2); + public static implicit operator LiteralInteger(float value) => New().Set(BitConverter.SingleToUInt32Bits(value), IntegerType.Float32, 1); + public static implicit operator LiteralInteger(double value) => New().Set(BitConverter.DoubleToUInt64Bits(value), IntegerType.Float64, 2); + public static implicit operator LiteralInteger(Enum value) => New().Set((ulong)(int)(object)value, IntegerType.Int32, 1); + + // NOTE: this is not in the standard, but this is some syntax sugar useful in some instructions (TypeInt ect) + public static implicit operator LiteralInteger(bool value) => New().Set(Convert.ToUInt64(value), IntegerType.Int32, 1); + + public static LiteralInteger CreateForEnum(T value) where T : Enum + { + return value; + } + + public void WriteOperand(BinaryWriter writer) + { + if (WordCount == 1) + { + writer.Write((uint)_data); + } + else + { + writer.Write(_data); + } + } + + public override bool Equals(object obj) + { + return obj is LiteralInteger literalInteger && Equals(literalInteger); + } + + public bool Equals(LiteralInteger cmpObj) + { + return Type == cmpObj.Type && _integerType == cmpObj._integerType && _data == cmpObj._data; + } + + public override int GetHashCode() + { + return DeterministicHashCode.Combine(Type, _data); + } + + public bool Equals(Operand obj) + { + return obj is LiteralInteger literalInteger && Equals(literalInteger); + } + } +} diff --git a/Spv.Generator/LiteralString.cs b/Spv.Generator/LiteralString.cs new file mode 100644 index 000000000..1cb1b8383 --- /dev/null +++ b/Spv.Generator/LiteralString.cs @@ -0,0 +1,51 @@ +using System; +using System.IO; +using System.Text; + +namespace Spv.Generator +{ + public class LiteralString : Operand, IEquatable + { + public OperandType Type => OperandType.String; + + private string _value; + + public LiteralString(string value) + { + _value = value; + } + + public ushort WordCount => (ushort)(_value.Length / 4 + 1); + + public void WriteOperand(BinaryWriter writer) + { + writer.Write(_value.AsSpan()); + + int paddingSize = 4 - (Encoding.ASCII.GetByteCount(_value) % 4); + + Span padding = stackalloc byte[paddingSize]; + + writer.Write(padding); + } + + public override bool Equals(object obj) + { + return obj is LiteralString literalString && Equals(literalString); + } + + public bool Equals(LiteralString cmpObj) + { + return Type == cmpObj.Type && _value.Equals(cmpObj._value); + } + + public override int GetHashCode() + { + return DeterministicHashCode.Combine(Type, DeterministicHashCode.GetHashCode(_value)); + } + + public bool Equals(Operand obj) + { + return obj is LiteralString literalString && Equals(literalString); + } + } +} diff --git a/Spv.Generator/Module.cs b/Spv.Generator/Module.cs new file mode 100644 index 000000000..1505d4df5 --- /dev/null +++ b/Spv.Generator/Module.cs @@ -0,0 +1,366 @@ +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using static Spv.Specification; + +namespace Spv.Generator +{ + public partial class Module + { + // TODO: register to SPIR-V registry + private const int GeneratorId = 0; + + private readonly uint _version; + + private uint _bound; + + // Follow spec order here why keeping it as dumb as possible. + private List _capabilities; + private List _extensions; + private Dictionary _extInstImports; + private AddressingModel _addressingModel; + private MemoryModel _memoryModel; + + private List _entrypoints; + private List _executionModes; + private List _debug; + private List _annotations; + + // In the declaration block. + private Dictionary _typeDeclarations; + // In the declaration block. + private List _globals; + // In the declaration block. + private Dictionary _constants; + // In the declaration block, for function that aren't defined in the module. + private List _functionsDeclarations; + + private List _functionsDefinitions; + + private GeneratorPool _instPool; + private GeneratorPool _integerPool; + + public Module(uint version, GeneratorPool instPool = null, GeneratorPool integerPool = null) + { + _version = version; + _bound = 1; + _capabilities = new List(); + _extensions = new List(); + _extInstImports = new Dictionary(); + _addressingModel = AddressingModel.Logical; + _memoryModel = MemoryModel.Simple; + _entrypoints = new List(); + _executionModes = new List(); + _debug = new List(); + _annotations = new List(); + _typeDeclarations = new Dictionary(); + _constants = new Dictionary(); + _globals = new List(); + _functionsDeclarations = new List(); + _functionsDefinitions = new List(); + + _instPool = instPool ?? new GeneratorPool(); + _integerPool = integerPool ?? new GeneratorPool(); + + LiteralInteger.RegisterPool(_integerPool); + } + + private uint GetNewId() + { + return _bound++; + } + + public void AddCapability(Capability capability) + { + _capabilities.Add(capability); + } + + public void AddExtension(string extension) + { + _extensions.Add(extension); + } + + public Instruction NewInstruction(Op opcode, uint id = Instruction.InvalidId, Instruction resultType = null) + { + var result = _instPool.Allocate(); + result.Set(opcode, id, resultType); + + return result; + } + + public Instruction AddExtInstImport(string import) + { + var key = new DeterministicStringKey(import); + + if (_extInstImports.TryGetValue(key, out Instruction extInstImport)) + { + // update the duplicate instance to use the good id so it ends up being encoded right. + return extInstImport; + } + + Instruction instruction = NewInstruction(Op.OpExtInstImport); + instruction.AddOperand(import); + + instruction.SetId(GetNewId()); + + _extInstImports.Add(key, instruction); + + return instruction; + } + + private void AddTypeDeclaration(Instruction instruction, bool forceIdAllocation) + { + var key = new TypeDeclarationKey(instruction); + + if (!forceIdAllocation) + { + if (_typeDeclarations.TryGetValue(key, out Instruction typeDeclaration)) + { + // update the duplicate instance to use the good id so it ends up being encoded right. + + instruction.SetId(typeDeclaration.Id); + + return; + } + } + + instruction.SetId(GetNewId()); + + _typeDeclarations.Add(key, instruction); + } + + public void AddEntryPoint(ExecutionModel executionModel, Instruction function, string name, params Instruction[] interfaces) + { + Debug.Assert(function.Opcode == Op.OpFunction); + + Instruction entryPoint = NewInstruction(Op.OpEntryPoint); + + entryPoint.AddOperand(executionModel); + entryPoint.AddOperand(function); + entryPoint.AddOperand(name); + entryPoint.AddOperand(interfaces); + + _entrypoints.Add(entryPoint); + } + + public void AddExecutionMode(Instruction function, ExecutionMode mode, params Operand[] parameters) + { + Debug.Assert(function.Opcode == Op.OpFunction); + + Instruction executionModeInstruction = NewInstruction(Op.OpExecutionMode); + + executionModeInstruction.AddOperand(function); + executionModeInstruction.AddOperand(mode); + executionModeInstruction.AddOperand(parameters); + + _executionModes.Add(executionModeInstruction); + } + + private void AddToFunctionDefinitions(Instruction instruction) + { + Debug.Assert(instruction.Opcode != Op.OpTypeInt); + _functionsDefinitions.Add(instruction); + } + + private void AddAnnotation(Instruction annotation) + { + _annotations.Add(annotation); + } + + private void AddDebug(Instruction debug) + { + _debug.Add(debug); + } + + public void AddLabel(Instruction label) + { + Debug.Assert(label.Opcode == Op.OpLabel); + + label.SetId(GetNewId()); + + AddToFunctionDefinitions(label); + } + + + public void AddLocalVariable(Instruction variable) + { + // TODO: ensure it has the local modifier + Debug.Assert(variable.Opcode == Op.OpVariable); + + variable.SetId(GetNewId()); + + AddToFunctionDefinitions(variable); + } + + public void AddGlobalVariable(Instruction variable) + { + // TODO: ensure it has the global modifier + // TODO: all constants opcodes (OpSpecXXX and the rest of the OpConstantXXX) + Debug.Assert(variable.Opcode == Op.OpVariable); + + variable.SetId(GetNewId()); + + _globals.Add(variable); + } + + private void AddConstant(Instruction constant) + { + Debug.Assert(constant.Opcode == Op.OpConstant || + constant.Opcode == Op.OpConstantFalse || + constant.Opcode == Op.OpConstantTrue || + constant.Opcode == Op.OpConstantNull || + constant.Opcode == Op.OpConstantComposite); + + var key = new ConstantKey(constant); + + if (_constants.TryGetValue(key, out Instruction global)) + { + // update the duplicate instance to use the good id so it ends up being encoded right. + constant.SetId(global.Id); + + return; + } + + constant.SetId(GetNewId()); + + _constants.Add(key, constant); + } + + public Instruction ExtInst(Instruction resultType, Instruction set, LiteralInteger instruction, params Operand[] parameters) + { + Instruction result = NewInstruction(Op.OpExtInst, GetNewId(), resultType); + + result.AddOperand(set); + result.AddOperand(instruction); + result.AddOperand(parameters); + AddToFunctionDefinitions(result); + + return result; + } + + public void SetMemoryModel(AddressingModel addressingModel, MemoryModel memoryModel) + { + _addressingModel = addressingModel; + _memoryModel = memoryModel; + } + + // TODO: Found a way to make the auto generate one used. + public Instruction OpenClPrintf(Instruction resultType, Instruction format, params Instruction[] additionalarguments) + { + Instruction result = NewInstruction(Op.OpExtInst, GetNewId(), resultType); + + result.AddOperand(AddExtInstImport("OpenCL.std")); + result.AddOperand((LiteralInteger)184); + result.AddOperand(format); + result.AddOperand(additionalarguments); + AddToFunctionDefinitions(result); + + return result; + } + + public byte[] Generate() + { + // Estimate the size needed for the generated code, to avoid expanding the MemoryStream. + int sizeEstimate = 1024 + _functionsDefinitions.Count * 32; + + using (MemoryStream stream = new MemoryStream(sizeEstimate)) + { + BinaryWriter writer = new BinaryWriter(stream, System.Text.Encoding.ASCII); + + // Header + writer.Write(MagicNumber); + writer.Write(_version); + writer.Write(GeneratorId); + writer.Write(_bound); + writer.Write(0u); + + // 1. + foreach (Capability capability in _capabilities) + { + Instruction capabilityInstruction = NewInstruction(Op.OpCapability); + + capabilityInstruction.AddOperand(capability); + capabilityInstruction.Write(writer); + } + + // 2. + foreach (string extension in _extensions) + { + Instruction extensionInstruction = NewInstruction(Op.OpExtension); + + extensionInstruction.AddOperand(extension); + extensionInstruction.Write(writer); + } + + // 3. + foreach (Instruction extInstImport in _extInstImports.Values) + { + extInstImport.Write(writer); + } + + // 4. + Instruction memoryModelInstruction = NewInstruction(Op.OpMemoryModel); + memoryModelInstruction.AddOperand(_addressingModel); + memoryModelInstruction.AddOperand(_memoryModel); + memoryModelInstruction.Write(writer); + + // 5. + foreach (Instruction entrypoint in _entrypoints) + { + entrypoint.Write(writer); + } + + // 6. + foreach (Instruction executionMode in _executionModes) + { + executionMode.Write(writer); + } + + // 7. + // TODO: order debug information correclty. + foreach (Instruction debug in _debug) + { + debug.Write(writer); + } + + // 8. + foreach (Instruction annotation in _annotations) + { + annotation.Write(writer); + } + + // Ensure that everything is in the right order in the declarations section + List declarations = new List(); + declarations.AddRange(_typeDeclarations.Values); + declarations.AddRange(_globals); + declarations.AddRange(_constants.Values); + declarations.Sort((Instruction x, Instruction y) => x.Id.CompareTo(y.Id)); + + // 9. + foreach (Instruction declaration in declarations) + { + declaration.Write(writer); + } + + // 10. + foreach (Instruction functionDeclaration in _functionsDeclarations) + { + functionDeclaration.Write(writer); + } + + // 11. + foreach (Instruction functionDefinition in _functionsDefinitions) + { + functionDefinition.Write(writer); + } + + _instPool.Clear(); + _integerPool.Clear(); + + LiteralInteger.UnregisterPool(); + + return stream.ToArray(); + } + } + } +} diff --git a/Spv.Generator/Operand.cs b/Spv.Generator/Operand.cs new file mode 100644 index 000000000..eaa2e13e0 --- /dev/null +++ b/Spv.Generator/Operand.cs @@ -0,0 +1,14 @@ +using System; +using System.IO; + +namespace Spv.Generator +{ + public interface Operand : IEquatable + { + OperandType Type { get; } + + ushort WordCount { get; } + + void WriteOperand(BinaryWriter writer); + } +} diff --git a/Spv.Generator/OperandType.cs b/Spv.Generator/OperandType.cs new file mode 100644 index 000000000..06e8e1fb3 --- /dev/null +++ b/Spv.Generator/OperandType.cs @@ -0,0 +1,10 @@ +namespace Spv.Generator +{ + public enum OperandType + { + Invalid, + Number, + String, + Instruction, + } +} diff --git a/Spv.Generator/Spv.Generator.csproj b/Spv.Generator/Spv.Generator.csproj new file mode 100644 index 000000000..fff78129b --- /dev/null +++ b/Spv.Generator/Spv.Generator.csproj @@ -0,0 +1,7 @@ + + + + net6.0 + + + diff --git a/Spv.Generator/TypeDeclarationKey.cs b/Spv.Generator/TypeDeclarationKey.cs new file mode 100644 index 000000000..a4aa95634 --- /dev/null +++ b/Spv.Generator/TypeDeclarationKey.cs @@ -0,0 +1,30 @@ +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Spv.Generator +{ + internal struct TypeDeclarationKey : IEquatable + { + private Instruction _typeDeclaration; + + public TypeDeclarationKey(Instruction typeDeclaration) + { + _typeDeclaration = typeDeclaration; + } + + public override int GetHashCode() + { + return DeterministicHashCode.Combine(_typeDeclaration.Opcode, _typeDeclaration.GetHashCodeContent()); + } + + public bool Equals(TypeDeclarationKey other) + { + return _typeDeclaration.Opcode == other._typeDeclaration.Opcode && _typeDeclaration.EqualsContent(other._typeDeclaration); + } + + public override bool Equals([NotNullWhen(true)] object obj) + { + return obj is TypeDeclarationKey && Equals((TypeDeclarationKey)obj); + } + } +} diff --git a/Spv.Generator/spirv.cs b/Spv.Generator/spirv.cs new file mode 100644 index 000000000..df28438bb --- /dev/null +++ b/Spv.Generator/spirv.cs @@ -0,0 +1,1625 @@ +// Copyright (c) 2014-2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python, C#, D +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// - C# will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL +// - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +namespace Spv +{ + + public static class Specification + { + public const uint MagicNumber = 0x07230203; + public const uint Version = 0x00010500; + public const uint Revision = 4; + public const uint OpCodeMask = 0xffff; + public const uint WordCountShift = 16; + + public enum SourceLanguage + { + Unknown = 0, + ESSL = 1, + GLSL = 2, + OpenCL_C = 3, + OpenCL_CPP = 4, + HLSL = 5, + } + + public enum ExecutionModel + { + Vertex = 0, + TessellationControl = 1, + TessellationEvaluation = 2, + Geometry = 3, + Fragment = 4, + GLCompute = 5, + Kernel = 6, + TaskNV = 5267, + MeshNV = 5268, + RayGenerationKHR = 5313, + RayGenerationNV = 5313, + IntersectionKHR = 5314, + IntersectionNV = 5314, + AnyHitKHR = 5315, + AnyHitNV = 5315, + ClosestHitKHR = 5316, + ClosestHitNV = 5316, + MissKHR = 5317, + MissNV = 5317, + CallableKHR = 5318, + CallableNV = 5318, + } + + public enum AddressingModel + { + Logical = 0, + Physical32 = 1, + Physical64 = 2, + PhysicalStorageBuffer64 = 5348, + PhysicalStorageBuffer64EXT = 5348, + } + + public enum MemoryModel + { + Simple = 0, + GLSL450 = 1, + OpenCL = 2, + Vulkan = 3, + VulkanKHR = 3, + } + + public enum ExecutionMode + { + Invocations = 0, + SpacingEqual = 1, + SpacingFractionalEven = 2, + SpacingFractionalOdd = 3, + VertexOrderCw = 4, + VertexOrderCcw = 5, + PixelCenterInteger = 6, + OriginUpperLeft = 7, + OriginLowerLeft = 8, + EarlyFragmentTests = 9, + PointMode = 10, + Xfb = 11, + DepthReplacing = 12, + DepthGreater = 14, + DepthLess = 15, + DepthUnchanged = 16, + LocalSize = 17, + LocalSizeHint = 18, + InputPoints = 19, + InputLines = 20, + InputLinesAdjacency = 21, + Triangles = 22, + InputTrianglesAdjacency = 23, + Quads = 24, + Isolines = 25, + OutputVertices = 26, + OutputPoints = 27, + OutputLineStrip = 28, + OutputTriangleStrip = 29, + VecTypeHint = 30, + ContractionOff = 31, + Initializer = 33, + Finalizer = 34, + SubgroupSize = 35, + SubgroupsPerWorkgroup = 36, + SubgroupsPerWorkgroupId = 37, + LocalSizeId = 38, + LocalSizeHintId = 39, + PostDepthCoverage = 4446, + DenormPreserve = 4459, + DenormFlushToZero = 4460, + SignedZeroInfNanPreserve = 4461, + RoundingModeRTE = 4462, + RoundingModeRTZ = 4463, + StencilRefReplacingEXT = 5027, + OutputLinesNV = 5269, + OutputPrimitivesNV = 5270, + DerivativeGroupQuadsNV = 5289, + DerivativeGroupLinearNV = 5290, + OutputTrianglesNV = 5298, + PixelInterlockOrderedEXT = 5366, + PixelInterlockUnorderedEXT = 5367, + SampleInterlockOrderedEXT = 5368, + SampleInterlockUnorderedEXT = 5369, + ShadingRateInterlockOrderedEXT = 5370, + ShadingRateInterlockUnorderedEXT = 5371, + MaxWorkgroupSizeINTEL = 5893, + MaxWorkDimINTEL = 5894, + NoGlobalOffsetINTEL = 5895, + NumSIMDWorkitemsINTEL = 5896, + } + + public enum StorageClass + { + UniformConstant = 0, + Input = 1, + Uniform = 2, + Output = 3, + Workgroup = 4, + CrossWorkgroup = 5, + Private = 6, + Function = 7, + Generic = 8, + PushConstant = 9, + AtomicCounter = 10, + Image = 11, + StorageBuffer = 12, + CallableDataKHR = 5328, + CallableDataNV = 5328, + IncomingCallableDataKHR = 5329, + IncomingCallableDataNV = 5329, + RayPayloadKHR = 5338, + RayPayloadNV = 5338, + HitAttributeKHR = 5339, + HitAttributeNV = 5339, + IncomingRayPayloadKHR = 5342, + IncomingRayPayloadNV = 5342, + ShaderRecordBufferKHR = 5343, + ShaderRecordBufferNV = 5343, + PhysicalStorageBuffer = 5349, + PhysicalStorageBufferEXT = 5349, + CodeSectionINTEL = 5605, + } + + public enum Dim + { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + Cube = 3, + Rect = 4, + Buffer = 5, + SubpassData = 6, + } + + public enum SamplerAddressingMode + { + None = 0, + ClampToEdge = 1, + Clamp = 2, + Repeat = 3, + RepeatMirrored = 4, + } + + public enum SamplerFilterMode + { + Nearest = 0, + Linear = 1, + } + + public enum ImageFormat + { + Unknown = 0, + Rgba32f = 1, + Rgba16f = 2, + R32f = 3, + Rgba8 = 4, + Rgba8Snorm = 5, + Rg32f = 6, + Rg16f = 7, + R11fG11fB10f = 8, + R16f = 9, + Rgba16 = 10, + Rgb10A2 = 11, + Rg16 = 12, + Rg8 = 13, + R16 = 14, + R8 = 15, + Rgba16Snorm = 16, + Rg16Snorm = 17, + Rg8Snorm = 18, + R16Snorm = 19, + R8Snorm = 20, + Rgba32i = 21, + Rgba16i = 22, + Rgba8i = 23, + R32i = 24, + Rg32i = 25, + Rg16i = 26, + Rg8i = 27, + R16i = 28, + R8i = 29, + Rgba32ui = 30, + Rgba16ui = 31, + Rgba8ui = 32, + R32ui = 33, + Rgb10a2ui = 34, + Rg32ui = 35, + Rg16ui = 36, + Rg8ui = 37, + R16ui = 38, + R8ui = 39, + R64ui = 40, + R64i = 41, + } + + public enum ImageChannelOrder + { + R = 0, + A = 1, + RG = 2, + RA = 3, + RGB = 4, + RGBA = 5, + BGRA = 6, + ARGB = 7, + Intensity = 8, + Luminance = 9, + Rx = 10, + RGx = 11, + RGBx = 12, + Depth = 13, + DepthStencil = 14, + sRGB = 15, + sRGBx = 16, + sRGBA = 17, + sBGRA = 18, + ABGR = 19, + } + + public enum ImageChannelDataType + { + SnormInt8 = 0, + SnormInt16 = 1, + UnormInt8 = 2, + UnormInt16 = 3, + UnormShort565 = 4, + UnormShort555 = 5, + UnormInt101010 = 6, + SignedInt8 = 7, + SignedInt16 = 8, + SignedInt32 = 9, + UnsignedInt8 = 10, + UnsignedInt16 = 11, + UnsignedInt32 = 12, + HalfFloat = 13, + Float = 14, + UnormInt24 = 15, + UnormInt101010_2 = 16, + } + + public enum ImageOperandsShift + { + Bias = 0, + Lod = 1, + Grad = 2, + ConstOffset = 3, + Offset = 4, + ConstOffsets = 5, + Sample = 6, + MinLod = 7, + MakeTexelAvailable = 8, + MakeTexelAvailableKHR = 8, + MakeTexelVisible = 9, + MakeTexelVisibleKHR = 9, + NonPrivateTexel = 10, + NonPrivateTexelKHR = 10, + VolatileTexel = 11, + VolatileTexelKHR = 11, + SignExtend = 12, + ZeroExtend = 13, + } + + public enum ImageOperandsMask + { + MaskNone = 0, + Bias = 0x00000001, + Lod = 0x00000002, + Grad = 0x00000004, + ConstOffset = 0x00000008, + Offset = 0x00000010, + ConstOffsets = 0x00000020, + Sample = 0x00000040, + MinLod = 0x00000080, + MakeTexelAvailable = 0x00000100, + MakeTexelAvailableKHR = 0x00000100, + MakeTexelVisible = 0x00000200, + MakeTexelVisibleKHR = 0x00000200, + NonPrivateTexel = 0x00000400, + NonPrivateTexelKHR = 0x00000400, + VolatileTexel = 0x00000800, + VolatileTexelKHR = 0x00000800, + SignExtend = 0x00001000, + ZeroExtend = 0x00002000, + Offsets = 0x00010000, + } + + public enum FPFastMathModeShift + { + NotNaN = 0, + NotInf = 1, + NSZ = 2, + AllowRecip = 3, + Fast = 4, + } + + public enum FPFastMathModeMask + { + MaskNone = 0, + NotNaN = 0x00000001, + NotInf = 0x00000002, + NSZ = 0x00000004, + AllowRecip = 0x00000008, + Fast = 0x00000010, + } + + public enum FPRoundingMode + { + RTE = 0, + RTZ = 1, + RTP = 2, + RTN = 3, + } + + public enum LinkageType + { + Export = 0, + Import = 1, + } + + public enum AccessQualifier + { + ReadOnly = 0, + WriteOnly = 1, + ReadWrite = 2, + } + + public enum FunctionParameterAttribute + { + Zext = 0, + Sext = 1, + ByVal = 2, + Sret = 3, + NoAlias = 4, + NoCapture = 5, + NoWrite = 6, + NoReadWrite = 7, + } + + public enum Decoration + { + RelaxedPrecision = 0, + SpecId = 1, + Block = 2, + BufferBlock = 3, + RowMajor = 4, + ColMajor = 5, + ArrayStride = 6, + MatrixStride = 7, + GLSLShared = 8, + GLSLPacked = 9, + CPacked = 10, + BuiltIn = 11, + NoPerspective = 13, + Flat = 14, + Patch = 15, + Centroid = 16, + Sample = 17, + Invariant = 18, + Restrict = 19, + Aliased = 20, + Volatile = 21, + Constant = 22, + Coherent = 23, + NonWritable = 24, + NonReadable = 25, + Uniform = 26, + UniformId = 27, + SaturatedConversion = 28, + Stream = 29, + Location = 30, + Component = 31, + Index = 32, + Binding = 33, + DescriptorSet = 34, + Offset = 35, + XfbBuffer = 36, + XfbStride = 37, + FuncParamAttr = 38, + FPRoundingMode = 39, + FPFastMathMode = 40, + LinkageAttributes = 41, + NoContraction = 42, + InputAttachmentIndex = 43, + Alignment = 44, + MaxByteOffset = 45, + AlignmentId = 46, + MaxByteOffsetId = 47, + NoSignedWrap = 4469, + NoUnsignedWrap = 4470, + ExplicitInterpAMD = 4999, + OverrideCoverageNV = 5248, + PassthroughNV = 5250, + ViewportRelativeNV = 5252, + SecondaryViewportRelativeNV = 5256, + PerPrimitiveNV = 5271, + PerViewNV = 5272, + PerTaskNV = 5273, + PerVertexNV = 5285, + NonUniform = 5300, + NonUniformEXT = 5300, + RestrictPointer = 5355, + RestrictPointerEXT = 5355, + AliasedPointer = 5356, + AliasedPointerEXT = 5356, + ReferencedIndirectlyINTEL = 5602, + CounterBuffer = 5634, + HlslCounterBufferGOOGLE = 5634, + HlslSemanticGOOGLE = 5635, + UserSemantic = 5635, + UserTypeGOOGLE = 5636, + RegisterINTEL = 5825, + MemoryINTEL = 5826, + NumbanksINTEL = 5827, + BankwidthINTEL = 5828, + MaxPrivateCopiesINTEL = 5829, + SinglepumpINTEL = 5830, + DoublepumpINTEL = 5831, + MaxReplicatesINTEL = 5832, + SimpleDualPortINTEL = 5833, + MergeINTEL = 5834, + BankBitsINTEL = 5835, + ForcePow2DepthINTEL = 5836, + } + + public enum BuiltIn + { + Position = 0, + PointSize = 1, + ClipDistance = 3, + CullDistance = 4, + VertexId = 5, + InstanceId = 6, + PrimitiveId = 7, + InvocationId = 8, + Layer = 9, + ViewportIndex = 10, + TessLevelOuter = 11, + TessLevelInner = 12, + TessCoord = 13, + PatchVertices = 14, + FragCoord = 15, + PointCoord = 16, + FrontFacing = 17, + SampleId = 18, + SamplePosition = 19, + SampleMask = 20, + FragDepth = 22, + HelperInvocation = 23, + NumWorkgroups = 24, + WorkgroupSize = 25, + WorkgroupId = 26, + LocalInvocationId = 27, + GlobalInvocationId = 28, + LocalInvocationIndex = 29, + WorkDim = 30, + GlobalSize = 31, + EnqueuedWorkgroupSize = 32, + GlobalOffset = 33, + GlobalLinearId = 34, + SubgroupSize = 36, + SubgroupMaxSize = 37, + NumSubgroups = 38, + NumEnqueuedSubgroups = 39, + SubgroupId = 40, + SubgroupLocalInvocationId = 41, + VertexIndex = 42, + InstanceIndex = 43, + SubgroupEqMask = 4416, + SubgroupEqMaskKHR = 4416, + SubgroupGeMask = 4417, + SubgroupGeMaskKHR = 4417, + SubgroupGtMask = 4418, + SubgroupGtMaskKHR = 4418, + SubgroupLeMask = 4419, + SubgroupLeMaskKHR = 4419, + SubgroupLtMask = 4420, + SubgroupLtMaskKHR = 4420, + BaseVertex = 4424, + BaseInstance = 4425, + DrawIndex = 4426, + PrimitiveShadingRateKHR = 4432, + DeviceIndex = 4438, + ViewIndex = 4440, + ShadingRateKHR = 4444, + BaryCoordNoPerspAMD = 4992, + BaryCoordNoPerspCentroidAMD = 4993, + BaryCoordNoPerspSampleAMD = 4994, + BaryCoordSmoothAMD = 4995, + BaryCoordSmoothCentroidAMD = 4996, + BaryCoordSmoothSampleAMD = 4997, + BaryCoordPullModelAMD = 4998, + FragStencilRefEXT = 5014, + ViewportMaskNV = 5253, + SecondaryPositionNV = 5257, + SecondaryViewportMaskNV = 5258, + PositionPerViewNV = 5261, + ViewportMaskPerViewNV = 5262, + FullyCoveredEXT = 5264, + TaskCountNV = 5274, + PrimitiveCountNV = 5275, + PrimitiveIndicesNV = 5276, + ClipDistancePerViewNV = 5277, + CullDistancePerViewNV = 5278, + LayerPerViewNV = 5279, + MeshViewCountNV = 5280, + MeshViewIndicesNV = 5281, + BaryCoordNV = 5286, + BaryCoordNoPerspNV = 5287, + FragSizeEXT = 5292, + FragmentSizeNV = 5292, + FragInvocationCountEXT = 5293, + InvocationsPerPixelNV = 5293, + LaunchIdKHR = 5319, + LaunchIdNV = 5319, + LaunchSizeKHR = 5320, + LaunchSizeNV = 5320, + WorldRayOriginKHR = 5321, + WorldRayOriginNV = 5321, + WorldRayDirectionKHR = 5322, + WorldRayDirectionNV = 5322, + ObjectRayOriginKHR = 5323, + ObjectRayOriginNV = 5323, + ObjectRayDirectionKHR = 5324, + ObjectRayDirectionNV = 5324, + RayTminKHR = 5325, + RayTminNV = 5325, + RayTmaxKHR = 5326, + RayTmaxNV = 5326, + InstanceCustomIndexKHR = 5327, + InstanceCustomIndexNV = 5327, + ObjectToWorldKHR = 5330, + ObjectToWorldNV = 5330, + WorldToObjectKHR = 5331, + WorldToObjectNV = 5331, + HitTNV = 5332, + HitKindKHR = 5333, + HitKindNV = 5333, + IncomingRayFlagsKHR = 5351, + IncomingRayFlagsNV = 5351, + RayGeometryIndexKHR = 5352, + WarpsPerSMNV = 5374, + SMCountNV = 5375, + WarpIDNV = 5376, + SMIDNV = 5377, + } + + public enum SelectionControlShift + { + Flatten = 0, + DontFlatten = 1, + } + + public enum SelectionControlMask + { + MaskNone = 0, + Flatten = 0x00000001, + DontFlatten = 0x00000002, + } + + public enum LoopControlShift + { + Unroll = 0, + DontUnroll = 1, + DependencyInfinite = 2, + DependencyLength = 3, + MinIterations = 4, + MaxIterations = 5, + IterationMultiple = 6, + PeelCount = 7, + PartialCount = 8, + InitiationIntervalINTEL = 16, + MaxConcurrencyINTEL = 17, + DependencyArrayINTEL = 18, + PipelineEnableINTEL = 19, + LoopCoalesceINTEL = 20, + MaxInterleavingINTEL = 21, + SpeculatedIterationsINTEL = 22, + } + + public enum LoopControlMask + { + MaskNone = 0, + Unroll = 0x00000001, + DontUnroll = 0x00000002, + DependencyInfinite = 0x00000004, + DependencyLength = 0x00000008, + MinIterations = 0x00000010, + MaxIterations = 0x00000020, + IterationMultiple = 0x00000040, + PeelCount = 0x00000080, + PartialCount = 0x00000100, + InitiationIntervalINTEL = 0x00010000, + MaxConcurrencyINTEL = 0x00020000, + DependencyArrayINTEL = 0x00040000, + PipelineEnableINTEL = 0x00080000, + LoopCoalesceINTEL = 0x00100000, + MaxInterleavingINTEL = 0x00200000, + SpeculatedIterationsINTEL = 0x00400000, + } + + public enum FunctionControlShift + { + Inline = 0, + DontInline = 1, + Pure = 2, + Const = 3, + } + + public enum FunctionControlMask + { + MaskNone = 0, + Inline = 0x00000001, + DontInline = 0x00000002, + Pure = 0x00000004, + Const = 0x00000008, + } + + public enum MemorySemanticsShift + { + Acquire = 1, + Release = 2, + AcquireRelease = 3, + SequentiallyConsistent = 4, + UniformMemory = 6, + SubgroupMemory = 7, + WorkgroupMemory = 8, + CrossWorkgroupMemory = 9, + AtomicCounterMemory = 10, + ImageMemory = 11, + OutputMemory = 12, + OutputMemoryKHR = 12, + MakeAvailable = 13, + MakeAvailableKHR = 13, + MakeVisible = 14, + MakeVisibleKHR = 14, + Volatile = 15, + } + + public enum MemorySemanticsMask + { + MaskNone = 0, + Acquire = 0x00000002, + Release = 0x00000004, + AcquireRelease = 0x00000008, + SequentiallyConsistent = 0x00000010, + UniformMemory = 0x00000040, + SubgroupMemory = 0x00000080, + WorkgroupMemory = 0x00000100, + CrossWorkgroupMemory = 0x00000200, + AtomicCounterMemory = 0x00000400, + ImageMemory = 0x00000800, + OutputMemory = 0x00001000, + OutputMemoryKHR = 0x00001000, + MakeAvailable = 0x00002000, + MakeAvailableKHR = 0x00002000, + MakeVisible = 0x00004000, + MakeVisibleKHR = 0x00004000, + Volatile = 0x00008000, + } + + public enum MemoryAccessShift + { + Volatile = 0, + Aligned = 1, + Nontemporal = 2, + MakePointerAvailable = 3, + MakePointerAvailableKHR = 3, + MakePointerVisible = 4, + MakePointerVisibleKHR = 4, + NonPrivatePointer = 5, + NonPrivatePointerKHR = 5, + } + + public enum MemoryAccessMask + { + MaskNone = 0, + Volatile = 0x00000001, + Aligned = 0x00000002, + Nontemporal = 0x00000004, + MakePointerAvailable = 0x00000008, + MakePointerAvailableKHR = 0x00000008, + MakePointerVisible = 0x00000010, + MakePointerVisibleKHR = 0x00000010, + NonPrivatePointer = 0x00000020, + NonPrivatePointerKHR = 0x00000020, + } + + public enum Scope + { + CrossDevice = 0, + Device = 1, + Workgroup = 2, + Subgroup = 3, + Invocation = 4, + QueueFamily = 5, + QueueFamilyKHR = 5, + ShaderCallKHR = 6, + } + + public enum GroupOperation + { + Reduce = 0, + InclusiveScan = 1, + ExclusiveScan = 2, + ClusteredReduce = 3, + PartitionedReduceNV = 6, + PartitionedInclusiveScanNV = 7, + PartitionedExclusiveScanNV = 8, + } + + public enum KernelEnqueueFlags + { + NoWait = 0, + WaitKernel = 1, + WaitWorkGroup = 2, + } + + public enum KernelProfilingInfoShift + { + CmdExecTime = 0, + } + + public enum KernelProfilingInfoMask + { + MaskNone = 0, + CmdExecTime = 0x00000001, + } + + public enum Capability + { + Matrix = 0, + Shader = 1, + Geometry = 2, + Tessellation = 3, + Addresses = 4, + Linkage = 5, + Kernel = 6, + Vector16 = 7, + Float16Buffer = 8, + Float16 = 9, + Float64 = 10, + Int64 = 11, + Int64Atomics = 12, + ImageBasic = 13, + ImageReadWrite = 14, + ImageMipmap = 15, + Pipes = 17, + Groups = 18, + DeviceEnqueue = 19, + LiteralSampler = 20, + AtomicStorage = 21, + Int16 = 22, + TessellationPointSize = 23, + GeometryPointSize = 24, + ImageGatherExtended = 25, + StorageImageMultisample = 27, + UniformBufferArrayDynamicIndexing = 28, + SampledImageArrayDynamicIndexing = 29, + StorageBufferArrayDynamicIndexing = 30, + StorageImageArrayDynamicIndexing = 31, + ClipDistance = 32, + CullDistance = 33, + ImageCubeArray = 34, + SampleRateShading = 35, + ImageRect = 36, + SampledRect = 37, + GenericPointer = 38, + Int8 = 39, + InputAttachment = 40, + SparseResidency = 41, + MinLod = 42, + Sampled1D = 43, + Image1D = 44, + SampledCubeArray = 45, + SampledBuffer = 46, + ImageBuffer = 47, + ImageMSArray = 48, + StorageImageExtendedFormats = 49, + ImageQuery = 50, + DerivativeControl = 51, + InterpolationFunction = 52, + TransformFeedback = 53, + GeometryStreams = 54, + StorageImageReadWithoutFormat = 55, + StorageImageWriteWithoutFormat = 56, + MultiViewport = 57, + SubgroupDispatch = 58, + NamedBarrier = 59, + PipeStorage = 60, + GroupNonUniform = 61, + GroupNonUniformVote = 62, + GroupNonUniformArithmetic = 63, + GroupNonUniformBallot = 64, + GroupNonUniformShuffle = 65, + GroupNonUniformShuffleRelative = 66, + GroupNonUniformClustered = 67, + GroupNonUniformQuad = 68, + ShaderLayer = 69, + ShaderViewportIndex = 70, + FragmentShadingRateKHR = 4422, + SubgroupBallotKHR = 4423, + DrawParameters = 4427, + SubgroupVoteKHR = 4431, + StorageBuffer16BitAccess = 4433, + StorageUniformBufferBlock16 = 4433, + StorageUniform16 = 4434, + UniformAndStorageBuffer16BitAccess = 4434, + StoragePushConstant16 = 4435, + StorageInputOutput16 = 4436, + DeviceGroup = 4437, + MultiView = 4439, + VariablePointersStorageBuffer = 4441, + VariablePointers = 4442, + AtomicStorageOps = 4445, + SampleMaskPostDepthCoverage = 4447, + StorageBuffer8BitAccess = 4448, + UniformAndStorageBuffer8BitAccess = 4449, + StoragePushConstant8 = 4450, + DenormPreserve = 4464, + DenormFlushToZero = 4465, + SignedZeroInfNanPreserve = 4466, + RoundingModeRTE = 4467, + RoundingModeRTZ = 4468, + RayQueryProvisionalKHR = 4471, + RayQueryKHR = 4472, + RayTraversalPrimitiveCullingKHR = 4478, + RayTracingKHR = 4479, + Float16ImageAMD = 5008, + ImageGatherBiasLodAMD = 5009, + FragmentMaskAMD = 5010, + StencilExportEXT = 5013, + ImageReadWriteLodAMD = 5015, + Int64ImageEXT = 5016, + ShaderClockKHR = 5055, + SampleMaskOverrideCoverageNV = 5249, + GeometryShaderPassthroughNV = 5251, + ShaderViewportIndexLayerEXT = 5254, + ShaderViewportIndexLayerNV = 5254, + ShaderViewportMaskNV = 5255, + ShaderStereoViewNV = 5259, + PerViewAttributesNV = 5260, + FragmentFullyCoveredEXT = 5265, + MeshShadingNV = 5266, + ImageFootprintNV = 5282, + FragmentBarycentricNV = 5284, + ComputeDerivativeGroupQuadsNV = 5288, + FragmentDensityEXT = 5291, + ShadingRateNV = 5291, + GroupNonUniformPartitionedNV = 5297, + ShaderNonUniform = 5301, + ShaderNonUniformEXT = 5301, + RuntimeDescriptorArray = 5302, + RuntimeDescriptorArrayEXT = 5302, + InputAttachmentArrayDynamicIndexing = 5303, + InputAttachmentArrayDynamicIndexingEXT = 5303, + UniformTexelBufferArrayDynamicIndexing = 5304, + UniformTexelBufferArrayDynamicIndexingEXT = 5304, + StorageTexelBufferArrayDynamicIndexing = 5305, + StorageTexelBufferArrayDynamicIndexingEXT = 5305, + UniformBufferArrayNonUniformIndexing = 5306, + UniformBufferArrayNonUniformIndexingEXT = 5306, + SampledImageArrayNonUniformIndexing = 5307, + SampledImageArrayNonUniformIndexingEXT = 5307, + StorageBufferArrayNonUniformIndexing = 5308, + StorageBufferArrayNonUniformIndexingEXT = 5308, + StorageImageArrayNonUniformIndexing = 5309, + StorageImageArrayNonUniformIndexingEXT = 5309, + InputAttachmentArrayNonUniformIndexing = 5310, + InputAttachmentArrayNonUniformIndexingEXT = 5310, + UniformTexelBufferArrayNonUniformIndexing = 5311, + UniformTexelBufferArrayNonUniformIndexingEXT = 5311, + StorageTexelBufferArrayNonUniformIndexing = 5312, + StorageTexelBufferArrayNonUniformIndexingEXT = 5312, + RayTracingNV = 5340, + VulkanMemoryModel = 5345, + VulkanMemoryModelKHR = 5345, + VulkanMemoryModelDeviceScope = 5346, + VulkanMemoryModelDeviceScopeKHR = 5346, + PhysicalStorageBufferAddresses = 5347, + PhysicalStorageBufferAddressesEXT = 5347, + ComputeDerivativeGroupLinearNV = 5350, + RayTracingProvisionalKHR = 5353, + CooperativeMatrixNV = 5357, + FragmentShaderSampleInterlockEXT = 5363, + FragmentShaderShadingRateInterlockEXT = 5372, + ShaderSMBuiltinsNV = 5373, + FragmentShaderPixelInterlockEXT = 5378, + DemoteToHelperInvocationEXT = 5379, + SubgroupShuffleINTEL = 5568, + SubgroupBufferBlockIOINTEL = 5569, + SubgroupImageBlockIOINTEL = 5570, + SubgroupImageMediaBlockIOINTEL = 5579, + IntegerFunctions2INTEL = 5584, + FunctionPointersINTEL = 5603, + IndirectReferencesINTEL = 5604, + SubgroupAvcMotionEstimationINTEL = 5696, + SubgroupAvcMotionEstimationIntraINTEL = 5697, + SubgroupAvcMotionEstimationChromaINTEL = 5698, + FPGAMemoryAttributesINTEL = 5824, + UnstructuredLoopControlsINTEL = 5886, + FPGALoopControlsINTEL = 5888, + KernelAttributesINTEL = 5892, + FPGAKernelAttributesINTEL = 5897, + BlockingPipesINTEL = 5945, + FPGARegINTEL = 5948, + AtomicFloat32AddEXT = 6033, + AtomicFloat64AddEXT = 6034, + } + + public enum RayFlagsShift + { + OpaqueKHR = 0, + NoOpaqueKHR = 1, + TerminateOnFirstHitKHR = 2, + SkipClosestHitShaderKHR = 3, + CullBackFacingTrianglesKHR = 4, + CullFrontFacingTrianglesKHR = 5, + CullOpaqueKHR = 6, + CullNoOpaqueKHR = 7, + SkipTrianglesKHR = 8, + SkipAABBsKHR = 9, + } + + public enum RayFlagsMask + { + MaskNone = 0, + OpaqueKHR = 0x00000001, + NoOpaqueKHR = 0x00000002, + TerminateOnFirstHitKHR = 0x00000004, + SkipClosestHitShaderKHR = 0x00000008, + CullBackFacingTrianglesKHR = 0x00000010, + CullFrontFacingTrianglesKHR = 0x00000020, + CullOpaqueKHR = 0x00000040, + CullNoOpaqueKHR = 0x00000080, + SkipTrianglesKHR = 0x00000100, + SkipAABBsKHR = 0x00000200, + } + + public enum RayQueryIntersection + { + RayQueryCandidateIntersectionKHR = 0, + RayQueryCommittedIntersectionKHR = 1, + } + + public enum RayQueryCommittedIntersectionType + { + RayQueryCommittedIntersectionNoneKHR = 0, + RayQueryCommittedIntersectionTriangleKHR = 1, + RayQueryCommittedIntersectionGeneratedKHR = 2, + } + + public enum RayQueryCandidateIntersectionType + { + RayQueryCandidateIntersectionTriangleKHR = 0, + RayQueryCandidateIntersectionAABBKHR = 1, + } + + public enum FragmentShadingRateShift + { + Vertical2Pixels = 0, + Vertical4Pixels = 1, + Horizontal2Pixels = 2, + Horizontal4Pixels = 3, + } + + public enum FragmentShadingRateMask + { + MaskNone = 0, + Vertical2Pixels = 0x00000001, + Vertical4Pixels = 0x00000002, + Horizontal2Pixels = 0x00000004, + Horizontal4Pixels = 0x00000008, + } + + public enum Op + { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, + OpSizeOf = 321, + OpTypePipeStorage = 322, + OpConstantPipeStorage = 323, + OpCreatePipeFromPipeStorage = 324, + OpGetKernelLocalSizeForSubgroupCount = 325, + OpGetKernelMaxNumSubgroups = 326, + OpTypeNamedBarrier = 327, + OpNamedBarrierInitialize = 328, + OpMemoryNamedBarrier = 329, + OpModuleProcessed = 330, + OpExecutionModeId = 331, + OpDecorateId = 332, + OpGroupNonUniformElect = 333, + OpGroupNonUniformAll = 334, + OpGroupNonUniformAny = 335, + OpGroupNonUniformAllEqual = 336, + OpGroupNonUniformBroadcast = 337, + OpGroupNonUniformBroadcastFirst = 338, + OpGroupNonUniformBallot = 339, + OpGroupNonUniformInverseBallot = 340, + OpGroupNonUniformBallotBitExtract = 341, + OpGroupNonUniformBallotBitCount = 342, + OpGroupNonUniformBallotFindLSB = 343, + OpGroupNonUniformBallotFindMSB = 344, + OpGroupNonUniformShuffle = 345, + OpGroupNonUniformShuffleXor = 346, + OpGroupNonUniformShuffleUp = 347, + OpGroupNonUniformShuffleDown = 348, + OpGroupNonUniformIAdd = 349, + OpGroupNonUniformFAdd = 350, + OpGroupNonUniformIMul = 351, + OpGroupNonUniformFMul = 352, + OpGroupNonUniformSMin = 353, + OpGroupNonUniformUMin = 354, + OpGroupNonUniformFMin = 355, + OpGroupNonUniformSMax = 356, + OpGroupNonUniformUMax = 357, + OpGroupNonUniformFMax = 358, + OpGroupNonUniformBitwiseAnd = 359, + OpGroupNonUniformBitwiseOr = 360, + OpGroupNonUniformBitwiseXor = 361, + OpGroupNonUniformLogicalAnd = 362, + OpGroupNonUniformLogicalOr = 363, + OpGroupNonUniformLogicalXor = 364, + OpGroupNonUniformQuadBroadcast = 365, + OpGroupNonUniformQuadSwap = 366, + OpCopyLogical = 400, + OpPtrEqual = 401, + OpPtrNotEqual = 402, + OpPtrDiff = 403, + OpTerminateInvocation = 4416, + OpSubgroupBallotKHR = 4421, + OpSubgroupFirstInvocationKHR = 4422, + OpSubgroupAllKHR = 4428, + OpSubgroupAnyKHR = 4429, + OpSubgroupAllEqualKHR = 4430, + OpSubgroupReadInvocationKHR = 4432, + OpTraceRayKHR = 4445, + OpExecuteCallableKHR = 4446, + OpConvertUToAccelerationStructureKHR = 4447, + OpIgnoreIntersectionKHR = 4448, + OpTerminateRayKHR = 4449, + OpTypeRayQueryKHR = 4472, + OpRayQueryInitializeKHR = 4473, + OpRayQueryTerminateKHR = 4474, + OpRayQueryGenerateIntersectionKHR = 4475, + OpRayQueryConfirmIntersectionKHR = 4476, + OpRayQueryProceedKHR = 4477, + OpRayQueryGetIntersectionTypeKHR = 4479, + OpGroupIAddNonUniformAMD = 5000, + OpGroupFAddNonUniformAMD = 5001, + OpGroupFMinNonUniformAMD = 5002, + OpGroupUMinNonUniformAMD = 5003, + OpGroupSMinNonUniformAMD = 5004, + OpGroupFMaxNonUniformAMD = 5005, + OpGroupUMaxNonUniformAMD = 5006, + OpGroupSMaxNonUniformAMD = 5007, + OpFragmentMaskFetchAMD = 5011, + OpFragmentFetchAMD = 5012, + OpReadClockKHR = 5056, + OpImageSampleFootprintNV = 5283, + OpGroupNonUniformPartitionNV = 5296, + OpWritePackedPrimitiveIndices4x8NV = 5299, + OpReportIntersectionKHR = 5334, + OpReportIntersectionNV = 5334, + OpIgnoreIntersectionNV = 5335, + OpTerminateRayNV = 5336, + OpTraceNV = 5337, + OpTypeAccelerationStructureKHR = 5341, + OpTypeAccelerationStructureNV = 5341, + OpExecuteCallableNV = 5344, + OpTypeCooperativeMatrixNV = 5358, + OpCooperativeMatrixLoadNV = 5359, + OpCooperativeMatrixStoreNV = 5360, + OpCooperativeMatrixMulAddNV = 5361, + OpCooperativeMatrixLengthNV = 5362, + OpBeginInvocationInterlockEXT = 5364, + OpEndInvocationInterlockEXT = 5365, + OpDemoteToHelperInvocationEXT = 5380, + OpIsHelperInvocationEXT = 5381, + OpSubgroupShuffleINTEL = 5571, + OpSubgroupShuffleDownINTEL = 5572, + OpSubgroupShuffleUpINTEL = 5573, + OpSubgroupShuffleXorINTEL = 5574, + OpSubgroupBlockReadINTEL = 5575, + OpSubgroupBlockWriteINTEL = 5576, + OpSubgroupImageBlockReadINTEL = 5577, + OpSubgroupImageBlockWriteINTEL = 5578, + OpSubgroupImageMediaBlockReadINTEL = 5580, + OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpUCountLeadingZerosINTEL = 5585, + OpUCountTrailingZerosINTEL = 5586, + OpAbsISubINTEL = 5587, + OpAbsUSubINTEL = 5588, + OpIAddSatINTEL = 5589, + OpUAddSatINTEL = 5590, + OpIAverageINTEL = 5591, + OpUAverageINTEL = 5592, + OpIAverageRoundedINTEL = 5593, + OpUAverageRoundedINTEL = 5594, + OpISubSatINTEL = 5595, + OpUSubSatINTEL = 5596, + OpIMul32x16INTEL = 5597, + OpUMul32x16INTEL = 5598, + OpFunctionPointerINTEL = 5600, + OpFunctionPointerCallINTEL = 5601, + OpDecorateString = 5632, + OpDecorateStringGOOGLE = 5632, + OpMemberDecorateString = 5633, + OpMemberDecorateStringGOOGLE = 5633, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, + OpTypeAvcRefPayloadINTEL = 5702, + OpTypeAvcSicPayloadINTEL = 5703, + OpTypeAvcMcePayloadINTEL = 5704, + OpTypeAvcMceResultINTEL = 5705, + OpTypeAvcImeResultINTEL = 5706, + OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + OpTypeAvcImeDualReferenceStreaminINTEL = 5710, + OpTypeAvcRefResultINTEL = 5711, + OpTypeAvcSicResultINTEL = 5712, + OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + OpSubgroupAvcMceConvertToImeResultINTEL = 5733, + OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + OpSubgroupAvcMceConvertToRefResultINTEL = 5735, + OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + OpSubgroupAvcMceConvertToSicResultINTEL = 5737, + OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + OpSubgroupAvcImeInitializeINTEL = 5747, + OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + OpSubgroupAvcImeSetDualReferenceINTEL = 5749, + OpSubgroupAvcImeRefWindowSizeINTEL = 5750, + OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + OpSubgroupAvcImeSetWeightedSadINTEL = 5756, + OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + OpSubgroupAvcImeConvertToMceResultINTEL = 5765, + OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + OpSubgroupAvcImeGetBorderReachedINTEL = 5776, + OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + OpSubgroupAvcFmeInitializeINTEL = 5781, + OpSubgroupAvcBmeInitializeINTEL = 5782, + OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + OpSubgroupAvcRefConvertToMceResultINTEL = 5790, + OpSubgroupAvcSicInitializeINTEL = 5791, + OpSubgroupAvcSicConfigureSkcINTEL = 5792, + OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + OpSubgroupAvcSicEvaluateIpeINTEL = 5803, + OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + OpSubgroupAvcSicConvertToMceResultINTEL = 5808, + OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + OpLoopControlINTEL = 5887, + OpReadPipeBlockingINTEL = 5946, + OpWritePipeBlockingINTEL = 5947, + OpFPGARegINTEL = 5949, + OpRayQueryGetRayTMinKHR = 6016, + OpRayQueryGetRayFlagsKHR = 6017, + OpRayQueryGetIntersectionTKHR = 6018, + OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + OpRayQueryGetIntersectionInstanceIdKHR = 6020, + OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + OpRayQueryGetIntersectionGeometryIndexKHR = 6022, + OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + OpRayQueryGetIntersectionBarycentricsKHR = 6024, + OpRayQueryGetIntersectionFrontFaceKHR = 6025, + OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + OpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + OpRayQueryGetWorldRayDirectionKHR = 6029, + OpRayQueryGetWorldRayOriginKHR = 6030, + OpRayQueryGetIntersectionObjectToWorldKHR = 6031, + OpRayQueryGetIntersectionWorldToObjectKHR = 6032, + OpAtomicFAddEXT = 6035, + } + } +} +