diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index bbced2dde..d7388476b 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -11,6 +11,9 @@ namespace Ryujinx.Graphics.GAL public readonly bool HasVectorIndexingBug; public readonly bool SupportsAstcCompression; + public readonly bool SupportsBc123Compression; + public readonly bool SupportsBc45Compression; + public readonly bool SupportsBc67Compression; public readonly bool Supports3DTextureCompression; public readonly bool SupportsBgraFormat; public readonly bool SupportsR4G4Format; @@ -40,6 +43,9 @@ namespace Ryujinx.Graphics.GAL bool hasFrontFacingBug, bool hasVectorIndexingBug, bool supportsAstcCompression, + bool supportsBc123Compression, + bool supportsBc45Compression, + bool supportsBc67Compression, bool supports3DTextureCompression, bool supportsBgraFormat, bool supportsR4G4Format, @@ -66,6 +72,9 @@ namespace Ryujinx.Graphics.GAL HasFrontFacingBug = hasFrontFacingBug; HasVectorIndexingBug = hasVectorIndexingBug; SupportsAstcCompression = supportsAstcCompression; + SupportsBc123Compression = supportsBc123Compression; + SupportsBc45Compression = supportsBc45Compression; + SupportsBc67Compression = supportsBc67Compression; Supports3DTextureCompression = supports3DTextureCompression; SupportsBgraFormat = supportsBgraFormat; SupportsR4G4Format = supportsR4G4Format; diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs index 2ef76b686..0a578c924 100644 --- a/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -844,7 +844,7 @@ namespace Ryujinx.Graphics.Gpu.Image { data = PixelConverter.ConvertR4G4ToR4G4B4A4(data); } - else if (!_context.Capabilities.Supports3DTextureCompression && Target == Target.Texture3D) + else if (!TextureCompatibility.HostSupportsBcFormat(Format, Target, _context.Capabilities)) { switch (Format) { @@ -868,6 +868,14 @@ namespace Ryujinx.Graphics.Gpu.Image case Format.Bc5Unorm: data = BCnDecoder.DecodeBC5(data, width, height, depth, levels, layers, Format == Format.Bc5Snorm); break; + case Format.Bc6HSfloat: + case Format.Bc6HUfloat: + data = BCnDecoder.DecodeBC6(data, width, height, depth, levels, layers, Format == Format.Bc6HSfloat); + break; + case Format.Bc7Srgb: + case Format.Bc7Unorm: + data = BCnDecoder.DecodeBC7(data, width, height, depth, levels, layers); + break; } } diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs b/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs index 62cd456db..ea9801bbb 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs @@ -88,9 +88,9 @@ namespace Ryujinx.Graphics.Gpu.Image return new FormatInfo(Format.R4G4B4A4Unorm, 1, 1, 2, 4); } - if (!caps.Supports3DTextureCompression && info.Target == Target.Texture3D) + if (!HostSupportsBcFormat(info.FormatInfo.Format, info.Target, caps)) { - // The host API does not support 3D compressed formats. + // The host API does not this compressed format. // We assume software decompression will be done for those textures, // and so we adjust the format here to match the decompressor output. switch (info.FormatInfo.Format) @@ -98,10 +98,12 @@ namespace Ryujinx.Graphics.Gpu.Image case Format.Bc1RgbaSrgb: case Format.Bc2Srgb: case Format.Bc3Srgb: + case Format.Bc7Srgb: return new FormatInfo(Format.R8G8B8A8Srgb, 1, 1, 4, 4); case Format.Bc1RgbaUnorm: case Format.Bc2Unorm: case Format.Bc3Unorm: + case Format.Bc7Unorm: return new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4); case Format.Bc4Unorm: return new FormatInfo(Format.R8Unorm, 1, 1, 1, 1); @@ -111,12 +113,50 @@ namespace Ryujinx.Graphics.Gpu.Image return new FormatInfo(Format.R8G8Unorm, 1, 1, 2, 2); case Format.Bc5Snorm: return new FormatInfo(Format.R8G8Snorm, 1, 1, 2, 2); + case Format.Bc6HSfloat: + case Format.Bc6HUfloat: + return new FormatInfo(Format.R16G16B16A16Float, 1, 1, 8, 4); } } return info.FormatInfo; } + /// + /// Checks if the host API supports a given texture compression format of the BC family. + /// + /// BC format to be checked + /// Target usage of the texture + /// Host GPU Capabilities + /// True if the texture host supports the format with the given target usage, false otherwise + public static bool HostSupportsBcFormat(Format format, Target target, Capabilities caps) + { + bool not3DOr3DCompressionSupported = target != Target.Texture3D || caps.Supports3DTextureCompression; + + switch (format) + { + case Format.Bc1RgbaSrgb: + case Format.Bc1RgbaUnorm: + case Format.Bc2Srgb: + case Format.Bc2Unorm: + case Format.Bc3Srgb: + case Format.Bc3Unorm: + return caps.SupportsBc123Compression && not3DOr3DCompressionSupported; + case Format.Bc4Unorm: + case Format.Bc4Snorm: + case Format.Bc5Unorm: + case Format.Bc5Snorm: + return caps.SupportsBc45Compression && not3DOr3DCompressionSupported; + case Format.Bc6HSfloat: + case Format.Bc6HUfloat: + case Format.Bc7Srgb: + case Format.Bc7Unorm: + return caps.SupportsBc67Compression && not3DOr3DCompressionSupported; + } + + return true; + } + /// /// Determines whether a texture can flush its data back to guest memory. /// @@ -748,7 +788,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// True if the texture target and samples count matches, false otherwise public static bool TargetAndSamplesCompatible(TextureInfo lhs, TextureInfo rhs) { - return lhs.Target == rhs.Target && + return lhs.Target == rhs.Target && lhs.SamplesInX == rhs.SamplesInX && lhs.SamplesInY == rhs.SamplesInY; } diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index c20eccf8f..1e36b1cf9 100644 --- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -17,6 +17,9 @@ namespace Ryujinx.Graphics.OpenGL private static readonly Lazy _supportsQuads = new Lazy(SupportsQuadsCheck); private static readonly Lazy _supportsSeamlessCubemapPerTexture = new Lazy(() => HasExtension("GL_ARB_seamless_cubemap_per_texture")); private static readonly Lazy _supportsShaderBallot = new Lazy(() => HasExtension("GL_ARB_shader_ballot")); + private static readonly Lazy _supportsTextureCompressionBptc = new Lazy(() => HasExtension("GL_EXT_texture_compression_bptc")); + private static readonly Lazy _supportsTextureCompressionRgtc = new Lazy(() => HasExtension("GL_EXT_texture_compression_rgtc")); + private static readonly Lazy _supportsTextureCompressionS3tc = new Lazy(() => HasExtension("GL_EXT_texture_compression_s3tc")); private static readonly Lazy _supportsTextureShadowLod = new Lazy(() => HasExtension("GL_EXT_texture_shadow_lod")); private static readonly Lazy _supportsViewportSwizzle = new Lazy(() => HasExtension("GL_NV_viewport_swizzle")); @@ -56,6 +59,9 @@ namespace Ryujinx.Graphics.OpenGL public static bool SupportsQuads => _supportsQuads.Value; public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value; public static bool SupportsShaderBallot => _supportsShaderBallot.Value; + public static bool SupportsTextureCompressionBptc => _supportsTextureCompressionBptc.Value; + public static bool SupportsTextureCompressionRgtc => _supportsTextureCompressionRgtc.Value; + public static bool SupportsTextureCompressionS3tc => _supportsTextureCompressionS3tc.Value; public static bool SupportsTextureShadowLod => _supportsTextureShadowLod.Value; public static bool SupportsViewportSwizzle => _supportsViewportSwizzle.Value; diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index 78335a133..67d685f6c 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -105,6 +105,9 @@ namespace Ryujinx.Graphics.OpenGL hasFrontFacingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows, hasVectorIndexingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows, supportsAstcCompression: HwCapabilities.SupportsAstcCompression, + supportsBc123Compression: HwCapabilities.SupportsTextureCompressionS3tc, + supportsBc45Compression: HwCapabilities.SupportsTextureCompressionRgtc, + supportsBc67Compression: true, // Should check BPTC extension, but for some reason NVIDIA is not exposing the extension. supports3DTextureCompression: false, supportsBgraFormat: false, supportsR4G4Format: false, diff --git a/Ryujinx.Graphics.Texture/BC6Decoder.cs b/Ryujinx.Graphics.Texture/BC6Decoder.cs new file mode 100644 index 000000000..819bf022f --- /dev/null +++ b/Ryujinx.Graphics.Texture/BC6Decoder.cs @@ -0,0 +1,819 @@ +using Ryujinx.Graphics.Texture.Utils; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Texture +{ + static class BC6Decoder + { + private const int HalfOne = 0x3C00; + + public static void Decode(Span output, ReadOnlySpan data, int width, int height, bool signed) + { + ReadOnlySpan blocks = MemoryMarshal.Cast(data); + + Span output64 = MemoryMarshal.Cast(output); + + int wInBlocks = (width + 3) / 4; + int hInBlocks = (height + 3) / 4; + + for (int y = 0; y < hInBlocks; y++) + { + int y2 = y * 4; + int bh = Math.Min(4, height - y2); + + for (int x = 0; x < wInBlocks; x++) + { + int x2 = x * 4; + int bw = Math.Min(4, width - x2); + + DecodeBlock(blocks[y * wInBlocks + x], output64.Slice(y2 * width + x2), bw, bh, width, signed); + } + } + } + + private static void DecodeBlock(Block block, Span output, int w, int h, int width, bool signed) + { + int mode = (int)(block.Low & 3); + if ((mode & 2) != 0) + { + mode = (int)(block.Low & 0x1f); + } + + Span endPoints = stackalloc RgbaColor32[4]; + int subsetCount = DecodeEndPoints(ref block, endPoints, mode, signed); + if (subsetCount == 0) + { + // Mode is invalid, the spec mandates that hardware fills the block with + // a opaque black color. + for (int ty = 0; ty < h; ty++) + { + int baseOffs = ty * width; + + for (int tx = 0; tx < w; tx++) + { + output[baseOffs + tx] = (ulong)HalfOne << 48; + } + } + + return; + } + + int partition; + int indexBitCount; + ulong indices; + + if (subsetCount > 1) + { + partition = (int)((block.High >> 13) & 0x1F); + indexBitCount = 3; + + int fixUpIndex = BC67Tables.FixUpIndices[subsetCount - 1][partition][1] * 3; + ulong lowMask = (ulong.MaxValue >> (65 - fixUpIndex)) << 3; + ulong highMask = ulong.MaxValue << (fixUpIndex + 3); + + indices = ((block.High >> 16) & highMask) | ((block.High >> 17) & lowMask) | ((block.High >> 18) & 3); + } + else + { + partition = 0; + indexBitCount = 4; + indices = (block.High & ~0xFUL) | ((block.High >> 1) & 7); + } + + ulong indexMask = (1UL << indexBitCount) - 1; + + for (int ty = 0; ty < h; ty++) + { + int baseOffs = ty * width; + + for (int tx = 0; tx < w; tx++) + { + int offs = baseOffs + tx; + int index = (int)(indices & indexMask); + int endPointBase = BC67Tables.PartitionTable[subsetCount - 1][partition][ty * 4 + tx] << 1; + + RgbaColor32 color1 = endPoints[endPointBase]; + RgbaColor32 color2 = endPoints[endPointBase + 1]; + + RgbaColor32 color = BC67Utils.Interpolate(color1, color2, index, indexBitCount); + + output[offs] = + (ulong)FinishUnquantize(color.R, signed) | + ((ulong)FinishUnquantize(color.G, signed) << 16) | + ((ulong)FinishUnquantize(color.B, signed) << 32) | + ((ulong)HalfOne << 48); + + indices >>= indexBitCount; + } + } + } + + private static int DecodeEndPoints(ref Block block, Span endPoints, int mode, bool signed) + { + ulong low = block.Low; + ulong high = block.High; + + int r0 = 0, g0 = 0, b0 = 0, r1 = 0, g1 = 0, b1 = 0, r2 = 0, g2 = 0, b2 = 0, r3 = 0, g3 = 0, b3 = 0; + int subsetCount; + + switch (mode) + { + case 0: + r0 = (int)(low >> 5) & 0x3FF; + g0 = (int)(low >> 15) & 0x3FF; + b0 = (int)(low >> 25) & 0x3FF; + + if (signed) + { + r0 = SignExtend(r0, 10); + g0 = SignExtend(g0, 10); + b0 = SignExtend(b0, 10); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(((low << 2) & 0x10) | ((low >> 41) & 0xF)), 5); + b2 = b0 + SignExtend((int)(((low << 1) & 0x10) | ((high << 3) & 0x08) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5); + b3 = b0 + SignExtend((int)( + ((low) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x04) | + ((low >> 59) & 0x02) | + ((low >> 50) & 0x01)), 5); + + r0 = Unquantize(r0, 10, signed); + g0 = Unquantize(g0, 10, signed); + b0 = Unquantize(b0, 10, signed); + + r1 = Unquantize(r1 & 0x3FF, 10, signed); + g1 = Unquantize(g1 & 0x3FF, 10, signed); + b1 = Unquantize(b1 & 0x3FF, 10, signed); + + r2 = Unquantize(r2 & 0x3FF, 10, signed); + g2 = Unquantize(g2 & 0x3FF, 10, signed); + b2 = Unquantize(b2 & 0x3FF, 10, signed); + + r3 = Unquantize(r3 & 0x3FF, 10, signed); + g3 = Unquantize(g3 & 0x3FF, 10, signed); + b3 = Unquantize(b3 & 0x3FF, 10, signed); + + subsetCount = 2; + break; + case 1: + r0 = (int)(low >> 5) & 0x7F; + g0 = (int)(low >> 15) & 0x7F; + b0 = (int)(low >> 25) & 0x7F; + + if (signed) + { + r0 = SignExtend(r0, 7); + g0 = SignExtend(g0, 7); + b0 = SignExtend(b0, 7); + } + + r1 = r0 + SignExtend((int)(low >> 35), 6); + g1 = g0 + SignExtend((int)(low >> 45), 6); + b1 = b0 + SignExtend((int)(low >> 55), 6); + + r2 = r0 + SignExtend((int)(high >> 1), 6); + g2 = g0 + SignExtend((int)(((low << 3) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0x0F)), 6); + b2 = b0 + SignExtend((int)( + ((low >> 17) & 0x20) | + ((low >> 10) & 0x10) | + ((high << 3) & 0x08) | + (low >> 61)), 6); + + r3 = r0 + SignExtend((int)(high >> 7), 6); + g3 = g0 + SignExtend((int)(((low << 1) & 0x30) | ((low >> 51) & 0xF)), 6); + b3 = b0 + SignExtend((int)( + ((low >> 28) & 0x20) | + ((low >> 30) & 0x10) | + ((low >> 29) & 0x08) | + ((low >> 21) & 0x04) | + ((low >> 12) & 0x03)), 6); + + r0 = Unquantize(r0, 7, signed); + g0 = Unquantize(g0, 7, signed); + b0 = Unquantize(b0, 7, signed); + + r1 = Unquantize(r1 & 0x7F, 7, signed); + g1 = Unquantize(g1 & 0x7F, 7, signed); + b1 = Unquantize(b1 & 0x7F, 7, signed); + + r2 = Unquantize(r2 & 0x7F, 7, signed); + g2 = Unquantize(g2 & 0x7F, 7, signed); + b2 = Unquantize(b2 & 0x7F, 7, signed); + + r3 = Unquantize(r3 & 0x7F, 7, signed); + g3 = Unquantize(g3 & 0x7F, 7, signed); + b3 = Unquantize(b3 & 0x7F, 7, signed); + + subsetCount = 2; + break; + case 2: + r0 = (int)(((low >> 30) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 39) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((low >> 49) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 11); + g0 = SignExtend(g0, 11); + b0 = SignExtend(b0, 11); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 4); + b1 = b0 + SignExtend((int)(low >> 55), 4); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(low >> 41), 4); + b2 = b0 + SignExtend((int)(((high << 3) & 8) | (low >> 61)), 4); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(low >> 51), 4); + b3 = b0 + SignExtend((int)( + ((high >> 9) & 8) | + ((high >> 4) & 4) | + ((low >> 59) & 2) | + ((low >> 50) & 1)), 4); + + r0 = Unquantize(r0, 11, signed); + g0 = Unquantize(g0, 11, signed); + b0 = Unquantize(b0, 11, signed); + + r1 = Unquantize(r1 & 0x7FF, 11, signed); + g1 = Unquantize(g1 & 0x7FF, 11, signed); + b1 = Unquantize(b1 & 0x7FF, 11, signed); + + r2 = Unquantize(r2 & 0x7FF, 11, signed); + g2 = Unquantize(g2 & 0x7FF, 11, signed); + b2 = Unquantize(b2 & 0x7FF, 11, signed); + + r3 = Unquantize(r3 & 0x7FF, 11, signed); + g3 = Unquantize(g3 & 0x7FF, 11, signed); + b3 = Unquantize(b3 & 0x7FF, 11, signed); + + subsetCount = 2; + break; + case 3: + r0 = (int)(low >> 5) & 0x3FF; + g0 = (int)(low >> 15) & 0x3FF; + b0 = (int)(low >> 25) & 0x3FF; + + r1 = (int)(low >> 35) & 0x3FF; + g1 = (int)(low >> 45) & 0x3FF; + b1 = (int)(((high << 9) & 0x200) | (low >> 55)); + + if (signed) + { + r0 = SignExtend(r0, 10); + g0 = SignExtend(g0, 10); + b0 = SignExtend(b0, 10); + + r1 = SignExtend(r1, 10); + g1 = SignExtend(g1, 10); + b1 = SignExtend(b1, 10); + } + + r0 = Unquantize(r0, 10, signed); + g0 = Unquantize(g0, 10, signed); + b0 = Unquantize(b0, 10, signed); + + r1 = Unquantize(r1, 10, signed); + g1 = Unquantize(g1, 10, signed); + b1 = Unquantize(b1, 10, signed); + + subsetCount = 1; + break; + case 6: + r0 = (int)(((low >> 29) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 40) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((low >> 49) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 11); + g0 = SignExtend(g0, 11); + b0 = SignExtend(b0, 11); + } + + r1 = r0 + SignExtend((int)(low >> 35), 4); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 4); + + r2 = r0 + SignExtend((int)(high >> 1), 4); + g2 = g0 + SignExtend((int)(((high >> 7) & 0x10) | ((low >> 41) & 0x0F)), 5); + b2 = b0 + SignExtend((int)(((high << 3) & 0x08) | ((low >> 61))), 4); + + r3 = r0 + SignExtend((int)(high >> 7), 4); + g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0x0F)), 5); + b3 = b0 + SignExtend((int)( + ((high >> 9) & 8) | + ((high >> 4) & 4) | + ((low >> 59) & 2) | + ((high >> 5) & 1)), 4); + + r0 = Unquantize(r0, 11, signed); + g0 = Unquantize(g0, 11, signed); + b0 = Unquantize(b0, 11, signed); + + r1 = Unquantize(r1 & 0x7FF, 11, signed); + g1 = Unquantize(g1 & 0x7FF, 11, signed); + b1 = Unquantize(b1 & 0x7FF, 11, signed); + + r2 = Unquantize(r2 & 0x7FF, 11, signed); + g2 = Unquantize(g2 & 0x7FF, 11, signed); + b2 = Unquantize(b2 & 0x7FF, 11, signed); + + r3 = Unquantize(r3 & 0x7FF, 11, signed); + g3 = Unquantize(g3 & 0x7FF, 11, signed); + b3 = Unquantize(b3 & 0x7FF, 11, signed); + + subsetCount = 2; + break; + case 7: + r0 = (int)(((low >> 34) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 44) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((high << 10) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 11); + g0 = SignExtend(g0, 11); + b0 = SignExtend(b0, 11); + } + + r1 = (r0 + SignExtend((int)(low >> 35), 9)) & 0x7FF; + g1 = (g0 + SignExtend((int)(low >> 45), 9)) & 0x7FF; + b1 = (b0 + SignExtend((int)(low >> 55), 9)) & 0x7FF; + + r0 = Unquantize(r0, 11, signed); + g0 = Unquantize(g0, 11, signed); + b0 = Unquantize(b0, 11, signed); + + r1 = Unquantize(r1, 11, signed); + g1 = Unquantize(g1, 11, signed); + b1 = Unquantize(b1, 11, signed); + + subsetCount = 1; + break; + case 10: + r0 = (int)(((low >> 29) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 39) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((low >> 50) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 11); + g0 = SignExtend(g0, 11); + b0 = SignExtend(b0, 11); + } + + r1 = r0 + SignExtend((int)(low >> 35), 4); + g1 = g0 + SignExtend((int)(low >> 45), 4); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 4); + g2 = g0 + SignExtend((int)(low >> 41), 4); + b2 = b0 + SignExtend((int)(((low >> 36) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 4); + g3 = g0 + SignExtend((int)(low >> 51), 4); + b3 = b0 + SignExtend((int)( + ((high >> 7) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x06) | + ((low >> 50) & 0x01)), 5); + + r0 = Unquantize(r0, 11, signed); + g0 = Unquantize(g0, 11, signed); + b0 = Unquantize(b0, 11, signed); + + r1 = Unquantize(r1 & 0x7FF, 11, signed); + g1 = Unquantize(g1 & 0x7FF, 11, signed); + b1 = Unquantize(b1 & 0x7FF, 11, signed); + + r2 = Unquantize(r2 & 0x7FF, 11, signed); + g2 = Unquantize(g2 & 0x7FF, 11, signed); + b2 = Unquantize(b2 & 0x7FF, 11, signed); + + r3 = Unquantize(r3 & 0x7FF, 11, signed); + g3 = Unquantize(g3 & 0x7FF, 11, signed); + b3 = Unquantize(b3 & 0x7FF, 11, signed); + + subsetCount = 2; + break; + case 11: + r0 = (int)(((low >> 32) & 0x800) | ((low >> 34) & 0x400) | ((low >> 5) & 0x3FF)); + g0 = (int)(((low >> 42) & 0x800) | ((low >> 44) & 0x400) | ((low >> 15) & 0x3FF)); + b0 = (int)(((low >> 52) & 0x800) | ((high << 10) & 0x400) | ((low >> 25) & 0x3FF)); + + if (signed) + { + r0 = SignExtend(r0, 12); + g0 = SignExtend(g0, 12); + b0 = SignExtend(b0, 12); + } + + r1 = (r0 + SignExtend((int)(low >> 35), 8)) & 0xFFF; + g1 = (g0 + SignExtend((int)(low >> 45), 8)) & 0xFFF; + b1 = (b0 + SignExtend((int)(low >> 55), 8)) & 0xFFF; + + r0 = Unquantize(r0, 12, signed); + g0 = Unquantize(g0, 12, signed); + b0 = Unquantize(b0, 12, signed); + + r1 = Unquantize(r1, 12, signed); + g1 = Unquantize(g1, 12, signed); + b1 = Unquantize(b1, 12, signed); + + subsetCount = 1; + break; + case 14: + r0 = (int)(low >> 5) & 0x1FF; + g0 = (int)(low >> 15) & 0x1FF; + b0 = (int)(low >> 25) & 0x1FF; + + if (signed) + { + r0 = SignExtend(r0, 9); + g0 = SignExtend(g0, 9); + b0 = SignExtend(b0, 9); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5); + b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5); + b3 = b0 + SignExtend((int)( + ((low >> 30) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x04) | + ((low >> 59) & 0x02) | + ((low >> 50) & 0x01)), 5); + + r0 = Unquantize(r0, 9, signed); + g0 = Unquantize(g0, 9, signed); + b0 = Unquantize(b0, 9, signed); + + r1 = Unquantize(r1 & 0x1FF, 9, signed); + g1 = Unquantize(g1 & 0x1FF, 9, signed); + b1 = Unquantize(b1 & 0x1FF, 9, signed); + + r2 = Unquantize(r2 & 0x1FF, 9, signed); + g2 = Unquantize(g2 & 0x1FF, 9, signed); + b2 = Unquantize(b2 & 0x1FF, 9, signed); + + r3 = Unquantize(r3 & 0x1FF, 9, signed); + g3 = Unquantize(g3 & 0x1FF, 9, signed); + b3 = Unquantize(b3 & 0x1FF, 9, signed); + + subsetCount = 2; + break; + case 15: + r0 = (BitReverse6((int)(low >> 39) & 0x3F) << 10) | ((int)(low >> 5) & 0x3FF); + g0 = (BitReverse6((int)(low >> 49) & 0x3F) << 10) | ((int)(low >> 15) & 0x3FF); + b0 = ((BitReverse6((int)(low >> 59)) | (int)(high & 1)) << 10) | ((int)(low >> 25) & 0x3FF); + + if (signed) + { + r0 = SignExtend(r0, 16); + g0 = SignExtend(g0, 16); + b0 = SignExtend(b0, 16); + } + + r1 = (r0 + SignExtend((int)(low >> 35), 4)) & 0xFFFF; + g1 = (g0 + SignExtend((int)(low >> 45), 4)) & 0xFFFF; + b1 = (b0 + SignExtend((int)(low >> 55), 4)) & 0xFFFF; + + subsetCount = 1; + break; + case 18: + r0 = (int)(low >> 5) & 0xFF; + g0 = (int)(low >> 15) & 0xFF; + b0 = (int)(low >> 25) & 0xFF; + + if (signed) + { + r0 = SignExtend(r0, 8); + g0 = SignExtend(g0, 8); + b0 = SignExtend(b0, 8); + } + + r1 = r0 + SignExtend((int)(low >> 35), 6); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 6); + g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5); + b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 6); + g3 = g0 + SignExtend((int)(((low >> 9) & 0x10) | ((low >> 51) & 0xF)), 5); + b3 = b0 + SignExtend((int)( + ((low >> 30) & 0x18) | + ((low >> 21) & 0x04) | + ((low >> 59) & 0x02) | + ((low >> 50) & 0x01)), 5); + + r0 = Unquantize(r0, 8, signed); + g0 = Unquantize(g0, 8, signed); + b0 = Unquantize(b0, 8, signed); + + r1 = Unquantize(r1 & 0xFF, 8, signed); + g1 = Unquantize(g1 & 0xFF, 8, signed); + b1 = Unquantize(b1 & 0xFF, 8, signed); + + r2 = Unquantize(r2 & 0xFF, 8, signed); + g2 = Unquantize(g2 & 0xFF, 8, signed); + b2 = Unquantize(b2 & 0xFF, 8, signed); + + r3 = Unquantize(r3 & 0xFF, 8, signed); + g3 = Unquantize(g3 & 0xFF, 8, signed); + b3 = Unquantize(b3 & 0xFF, 8, signed); + + subsetCount = 2; + break; + case 22: + r0 = (int)(low >> 5) & 0xFF; + g0 = (int)(low >> 15) & 0xFF; + b0 = (int)(low >> 25) & 0xFF; + + if (signed) + { + r0 = SignExtend(r0, 8); + g0 = SignExtend(g0, 8); + b0 = SignExtend(b0, 8); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 6); + b1 = b0 + SignExtend((int)(low >> 55), 5); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(((low >> 18) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 6); + b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 0x08) | (low >> 61)), 5); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(((low >> 28) & 0x20) | ((low >> 36) & 0x10) | ((low >> 51) & 0x0F)), 6); + b3 = b0 + SignExtend((int)( + ((low >> 30) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x04) | + ((low >> 59) & 0x02) | + ((low >> 13) & 0x01)), 5); + + r0 = Unquantize(r0, 8, signed); + g0 = Unquantize(g0, 8, signed); + b0 = Unquantize(b0, 8, signed); + + r1 = Unquantize(r1 & 0xFF, 8, signed); + g1 = Unquantize(g1 & 0xFF, 8, signed); + b1 = Unquantize(b1 & 0xFF, 8, signed); + + r2 = Unquantize(r2 & 0xFF, 8, signed); + g2 = Unquantize(g2 & 0xFF, 8, signed); + b2 = Unquantize(b2 & 0xFF, 8, signed); + + r3 = Unquantize(r3 & 0xFF, 8, signed); + g3 = Unquantize(g3 & 0xFF, 8, signed); + b3 = Unquantize(b3 & 0xFF, 8, signed); + + subsetCount = 2; + break; + case 26: + r0 = (int)(low >> 5) & 0xFF; + g0 = (int)(low >> 15) & 0xFF; + b0 = (int)(low >> 25) & 0xFF; + + if (signed) + { + r0 = SignExtend(r0, 8); + g0 = SignExtend(g0, 8); + b0 = SignExtend(b0, 8); + } + + r1 = r0 + SignExtend((int)(low >> 35), 5); + g1 = g0 + SignExtend((int)(low >> 45), 5); + b1 = b0 + SignExtend((int)(low >> 55), 6); + + r2 = r0 + SignExtend((int)(high >> 1), 5); + g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5); + b2 = b0 + SignExtend((int)( + ((low >> 18) & 0x20) | + ((low >> 10) & 0x10) | + ((high << 3) & 0x08) | + (low >> 61)), 6); + + r3 = r0 + SignExtend((int)(high >> 7), 5); + g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5); + b3 = b0 + SignExtend((int)( + ((low >> 28) & 0x20) | + ((low >> 30) & 0x10) | + ((high >> 9) & 0x08) | + ((high >> 4) & 0x04) | + ((low >> 12) & 0x02) | + ((low >> 50) & 0x01)), 6); + + r0 = Unquantize(r0, 8, signed); + g0 = Unquantize(g0, 8, signed); + b0 = Unquantize(b0, 8, signed); + + r1 = Unquantize(r1 & 0xFF, 8, signed); + g1 = Unquantize(g1 & 0xFF, 8, signed); + b1 = Unquantize(b1 & 0xFF, 8, signed); + + r2 = Unquantize(r2 & 0xFF, 8, signed); + g2 = Unquantize(g2 & 0xFF, 8, signed); + b2 = Unquantize(b2 & 0xFF, 8, signed); + + r3 = Unquantize(r3 & 0xFF, 8, signed); + g3 = Unquantize(g3 & 0xFF, 8, signed); + b3 = Unquantize(b3 & 0xFF, 8, signed); + + subsetCount = 2; + break; + case 30: + r0 = (int)(low >> 5) & 0x3F; + g0 = (int)(low >> 15) & 0x3F; + b0 = (int)(low >> 25) & 0x3F; + + r1 = (int)(low >> 35) & 0x3F; + g1 = (int)(low >> 45) & 0x3F; + b1 = (int)(low >> 55) & 0x3F; + + r2 = (int)(high >> 1) & 0x3F; + g2 = (int)(((low >> 16) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0xF)); + b2 = (int)(((low >> 17) & 0x20) | ((low >> 10) & 0x10) | ((high << 3) & 0x08) | (low >> 61)); + + r3 = (int)(high >> 7) & 0x3F; + g3 = (int)(((low >> 26) & 0x20) | ((low >> 7) & 0x10) | ((low >> 51) & 0xF)); + b3 = (int)( + ((low >> 28) & 0x20) | + ((low >> 30) & 0x10) | + ((low >> 29) & 0x08) | + ((low >> 21) & 0x04) | + ((low >> 12) & 0x03)); + + if (signed) + { + r0 = SignExtend(r0, 6); + g0 = SignExtend(g0, 6); + b0 = SignExtend(b0, 6); + + r1 = SignExtend(r1, 6); + g1 = SignExtend(g1, 6); + b1 = SignExtend(b1, 6); + + r2 = SignExtend(r2, 6); + g2 = SignExtend(g2, 6); + b2 = SignExtend(b2, 6); + + r3 = SignExtend(r3, 6); + g3 = SignExtend(g3, 6); + b3 = SignExtend(b3, 6); + } + + r0 = Unquantize(r0, 6, signed); + g0 = Unquantize(g0, 6, signed); + b0 = Unquantize(b0, 6, signed); + + r1 = Unquantize(r1, 6, signed); + g1 = Unquantize(g1, 6, signed); + b1 = Unquantize(b1, 6, signed); + + r2 = Unquantize(r2, 6, signed); + g2 = Unquantize(g2, 6, signed); + b2 = Unquantize(b2, 6, signed); + + r3 = Unquantize(r3, 6, signed); + g3 = Unquantize(g3, 6, signed); + b3 = Unquantize(b3, 6, signed); + + subsetCount = 2; + break; + default: + subsetCount = 0; + break; + } + + if (subsetCount > 0) + { + endPoints[0] = new RgbaColor32(r0, g0, b0, HalfOne); + endPoints[1] = new RgbaColor32(r1, g1, b1, HalfOne); + + if (subsetCount > 1) + { + endPoints[2] = new RgbaColor32(r2, g2, b2, HalfOne); + endPoints[3] = new RgbaColor32(r3, g3, b3, HalfOne); + } + } + + return subsetCount; + } + + private static int SignExtend(int value, int bits) + { + int shift = 32 - bits; + return (value << shift) >> shift; + } + + private static int Unquantize(int value, int bits, bool signed) + { + if (signed) + { + if (bits >= 16) + { + return value; + } + else + { + bool sign = value < 0; + + if (sign) + { + value = -value; + } + + if (value == 0) + { + return value; + } + else if (value >= ((1 << (bits - 1)) - 1)) + { + value = 0x7FFF; + } + else + { + value = ((value << 15) + 0x4000) >> (bits - 1); + } + + if (sign) + { + value = -value; + } + } + } + else + { + if (bits >= 15 || value == 0) + { + return value; + } + else if (value == ((1 << bits) - 1)) + { + return 0xFFFF; + } + else + { + return ((value << 16) + 0x8000) >> bits; + } + } + + return value; + } + + private static ushort FinishUnquantize(int value, bool signed) + { + if (signed) + { + value = value < 0 ? -((-value * 31) >> 5) : (value * 31) >> 5; + + int sign = 0; + if (value < 0) + { + sign = 0x8000; + value = -value; + } + + return (ushort)(sign | value); + } + else + { + return (ushort)((value * 31) >> 6); + } + } + + private static int BitReverse6(int value) + { + value = ((value >> 1) & 0x55) | ((value << 1) & 0xaa); + value = ((value >> 2) & 0x33) | ((value << 2) & 0xcc); + value = ((value >> 4) & 0x0f) | ((value << 4) & 0xf0); + return value >> 2; + } + } +} diff --git a/Ryujinx.Graphics.Texture/BC7Decoder.cs b/Ryujinx.Graphics.Texture/BC7Decoder.cs new file mode 100644 index 000000000..060d1ab85 --- /dev/null +++ b/Ryujinx.Graphics.Texture/BC7Decoder.cs @@ -0,0 +1,220 @@ +using Ryujinx.Graphics.Texture.Utils; +using System.Diagnostics; +using System; +using System.Numerics; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Texture +{ + static class BC7Decoder + { + public static void Decode(Span output, ReadOnlySpan data, int width, int height) + { + ReadOnlySpan blocks = MemoryMarshal.Cast(data); + + Span output32 = MemoryMarshal.Cast(output); + + int wInBlocks = (width + 3) / 4; + int hInBlocks = (height + 3) / 4; + + for (int y = 0; y < hInBlocks; y++) + { + int y2 = y * 4; + int bh = Math.Min(4, height - y2); + + for (int x = 0; x < wInBlocks; x++) + { + int x2 = x * 4; + int bw = Math.Min(4, width - x2); + + DecodeBlock(blocks[y * wInBlocks + x], output32.Slice(y2 * width + x2), bw, bh, width); + } + } + } + + private static void DecodeBlock(Block block, Span output, int w, int h, int width) + { + int mode = BitOperations.TrailingZeroCount((byte)block.Low | 0x100); + if (mode == 8) + { + // Mode is invalid, the spec mandates that hardware fills the block with + // a transparent black color. + for (int ty = 0; ty < h; ty++) + { + int baseOffs = ty * width; + + for (int tx = 0; tx < w; tx++) + { + int offs = baseOffs + tx; + + output[offs] = 0; + } + } + + return; + } + + BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; + + int offset = mode + 1; + int partition = (int)block.Decode(ref offset, modeInfo.PartitionBitCount); + int rotation = (int)block.Decode(ref offset, modeInfo.RotationBitCount); + int indexMode = (int)block.Decode(ref offset, modeInfo.IndexModeBitCount); + + Debug.Assert(partition < 64); + Debug.Assert(rotation < 4); + Debug.Assert(indexMode < 2); + + int endPointCount = modeInfo.SubsetCount * 2; + + Span endPoints = stackalloc RgbaColor32[endPointCount]; + Span pValues = stackalloc byte[modeInfo.PBits]; + + endPoints.Fill(new RgbaColor32(0, 0, 0, 255)); + + for (int i = 0; i < endPointCount; i++) + { + endPoints[i].R = (int)block.Decode(ref offset, modeInfo.ColorDepth); + } + + for (int i = 0; i < endPointCount; i++) + { + endPoints[i].G = (int)block.Decode(ref offset, modeInfo.ColorDepth); + } + + for (int i = 0; i < endPointCount; i++) + { + endPoints[i].B = (int)block.Decode(ref offset, modeInfo.ColorDepth); + } + + if (modeInfo.AlphaDepth != 0) + { + for (int i = 0; i < endPointCount; i++) + { + endPoints[i].A = (int)block.Decode(ref offset, modeInfo.AlphaDepth); + } + } + + for (int i = 0; i < modeInfo.PBits; i++) + { + pValues[i] = (byte)block.Decode(ref offset, 1); + } + + for (int i = 0; i < endPointCount; i++) + { + int pBit = -1; + + if (modeInfo.PBits != 0) + { + int pIndex = (i * modeInfo.PBits) / endPointCount; + pBit = pValues[pIndex]; + } + + Unquantize(ref endPoints[i], modeInfo.ColorDepth, modeInfo.AlphaDepth, pBit); + } + + byte[] partitionTable = BC67Tables.PartitionTable[modeInfo.SubsetCount - 1][partition]; + byte[] fixUpTable = BC67Tables.FixUpIndices[modeInfo.SubsetCount - 1][partition]; + + Span colorIndices = stackalloc byte[16]; + + for (int i = 0; i < 16; i++) + { + byte subset = partitionTable[i]; + int bitCount = i == fixUpTable[subset] ? modeInfo.ColorIndexBitCount - 1 : modeInfo.ColorIndexBitCount; + + colorIndices[i] = (byte)block.Decode(ref offset, bitCount); + Debug.Assert(colorIndices[i] < 16); + } + + Span alphaIndices = stackalloc byte[16]; + + if (modeInfo.AlphaIndexBitCount != 0) + { + for (int i = 0; i < 16; i++) + { + int bitCount = i != 0 ? modeInfo.AlphaIndexBitCount : modeInfo.AlphaIndexBitCount - 1; + + alphaIndices[i] = (byte)block.Decode(ref offset, bitCount); + Debug.Assert(alphaIndices[i] < 16); + } + } + + for (int ty = 0; ty < h; ty++) + { + int baseOffs = ty * width; + + for (int tx = 0; tx < w; tx++) + { + int i = ty * 4 + tx; + + RgbaColor32 color; + + byte subset = partitionTable[i]; + + RgbaColor32 color1 = endPoints[subset * 2]; + RgbaColor32 color2 = endPoints[subset * 2 + 1]; + + if (modeInfo.AlphaIndexBitCount != 0) + { + if (indexMode == 0) + { + color = BC67Utils.Interpolate(color1, color2, colorIndices[i], alphaIndices[i], modeInfo.ColorIndexBitCount, modeInfo.AlphaIndexBitCount); + } + else + { + color = BC67Utils.Interpolate(color1, color2, alphaIndices[i], colorIndices[i], modeInfo.AlphaIndexBitCount, modeInfo.ColorIndexBitCount); + } + } + else + { + color = BC67Utils.Interpolate(color1, color2, colorIndices[i], colorIndices[i], modeInfo.ColorIndexBitCount, modeInfo.ColorIndexBitCount); + } + + if (rotation != 0) + { + int a = color.A; + + switch (rotation) + { + case 1: color.A = color.R; color.R = a; break; + case 2: color.A = color.G; color.G = a; break; + case 3: color.A = color.B; color.B = a; break; + } + } + + RgbaColor8 color8 = color.GetColor8(); + + output[baseOffs + tx] = color8.ToUInt32(); + } + } + } + + private static void Unquantize(ref RgbaColor32 color, int colorDepth, int alphaDepth, int pBit) + { + color.R = UnquantizeComponent(color.R, colorDepth, pBit); + color.G = UnquantizeComponent(color.G, colorDepth, pBit); + color.B = UnquantizeComponent(color.B, colorDepth, pBit); + color.A = alphaDepth != 0 ? UnquantizeComponent(color.A, alphaDepth, pBit) : 255; + } + + private static int UnquantizeComponent(int component, int bits, int pBit) + { + int shift = 8 - bits; + int value = component << shift; + + if (pBit >= 0) + { + Debug.Assert(pBit <= 1); + value |= value >> (bits + 1); + value |= pBit << (shift - 1); + } + else + { + value |= value >> bits; + } + + return value; + } + } +} diff --git a/Ryujinx.Graphics.Texture/BCnDecoder.cs b/Ryujinx.Graphics.Texture/BCnDecoder.cs index b840cac89..053f6a735 100644 --- a/Ryujinx.Graphics.Texture/BCnDecoder.cs +++ b/Ryujinx.Graphics.Texture/BCnDecoder.cs @@ -515,6 +515,82 @@ namespace Ryujinx.Graphics.Texture return output; } + public static byte[] DecodeBC6(ReadOnlySpan data, int width, int height, int depth, int levels, int layers, bool signed) + { + int size = 0; + + for (int l = 0; l < levels; l++) + { + size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 8; + } + + byte[] output = new byte[size]; + + int inputOffset = 0; + int outputOffset = 0; + + for (int l = 0; l < levels; l++) + { + int w = BitUtils.DivRoundUp(width, BlockWidth); + int h = BitUtils.DivRoundUp(height, BlockHeight); + + for (int l2 = 0; l2 < layers; l2++) + { + for (int z = 0; z < depth; z++) + { + BC6Decoder.Decode(output.AsSpan().Slice(outputOffset), data.Slice(inputOffset), width, height, signed); + + inputOffset += w * h * 16; + outputOffset += width * height * 8; + } + } + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + depth = Math.Max(1, depth >> 1); + } + + return output; + } + + public static byte[] DecodeBC7(ReadOnlySpan data, int width, int height, int depth, int levels, int layers) + { + int size = 0; + + for (int l = 0; l < levels; l++) + { + size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4; + } + + byte[] output = new byte[size]; + + int inputOffset = 0; + int outputOffset = 0; + + for (int l = 0; l < levels; l++) + { + int w = BitUtils.DivRoundUp(width, BlockWidth); + int h = BitUtils.DivRoundUp(height, BlockHeight); + + for (int l2 = 0; l2 < layers; l2++) + { + for (int z = 0; z < depth; z++) + { + BC7Decoder.Decode(output.AsSpan().Slice(outputOffset), data.Slice(inputOffset), width, height); + + inputOffset += w * h * 16; + outputOffset += width * height * 4; + } + } + + width = Math.Max(1, width >> 1); + height = Math.Max(1, height >> 1); + depth = Math.Max(1, depth >> 1); + } + + return output; + } + private static ulong InterleaveBytes(uint left, uint right) { return InterleaveBytesWithZeros(left) | (InterleaveBytesWithZeros(right) << 8); diff --git a/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs b/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs index 896f70468..a69c10548 100644 --- a/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs +++ b/Ryujinx.Graphics.Texture/Encoders/BC7Encoder.cs @@ -1,4 +1,5 @@ -using System; +using Ryujinx.Graphics.Texture.Utils; +using System; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -10,53 +11,6 @@ namespace Ryujinx.Graphics.Texture.Encoders { static class BC7Encoder { - private struct ModeInfo - { - public readonly int SubsetCount; - public readonly int PartitionBitCount; - public readonly int PBits; - public readonly int RotationBitCount; - public readonly int IndexModeBitCount; - public readonly int ColorIndexBitCount; - public readonly int AlphaIndexBitCount; - public readonly int ColorDepth; - public readonly int AlphaDepth; - - public ModeInfo( - int subsetCount, - int partitionBitsCount, - int pBits, - int rotationBitCount, - int indexModeBitCount, - int colorIndexBitCount, - int alphaIndexBitCount, - int colorDepth, - int alphaDepth) - { - SubsetCount = subsetCount; - PartitionBitCount = partitionBitsCount; - PBits = pBits; - RotationBitCount = rotationBitCount; - IndexModeBitCount = indexModeBitCount; - ColorIndexBitCount = colorIndexBitCount; - AlphaIndexBitCount = alphaIndexBitCount; - ColorDepth = colorDepth; - AlphaDepth = alphaDepth; - } - } - - private static readonly ModeInfo[] _modeInfos = new ModeInfo[] - { - new ModeInfo(3, 4, 6, 0, 0, 3, 0, 4, 0), - new ModeInfo(2, 6, 2, 0, 0, 3, 0, 6, 0), - new ModeInfo(3, 6, 0, 0, 0, 2, 0, 5, 0), - new ModeInfo(2, 6, 4, 0, 0, 2, 0, 7, 0), - new ModeInfo(1, 0, 0, 2, 1, 2, 3, 5, 6), - new ModeInfo(1, 0, 0, 2, 0, 2, 2, 7, 8), - new ModeInfo(1, 0, 2, 0, 0, 4, 0, 7, 7), - new ModeInfo(2, 6, 4, 0, 0, 2, 0, 5, 5) - }; - public static void Encode(Memory outputStorage, ReadOnlyMemory data, int width, int height, EncodeMode mode) { int widthInBlocks = (width + 3) / 4; @@ -105,32 +59,6 @@ namespace Ryujinx.Graphics.Texture.Encoders 0, 13, 2, 1, 15, 14, 10, 23 }; - private struct Block - { - public ulong Low; - public ulong High; - - public void Encode(ulong value, ref int offset, int bits) - { - if (offset >= 64) - { - High |= value << (offset - 64); - } - else - { - Low |= value << offset; - - if (offset + bits > 64) - { - int remainder = 64 - offset; - High |= value >> remainder; - } - } - - offset += bits; - } - } - private static Block CompressBlock(ReadOnlySpan data, int x, int y, int width, int height, bool fastMode) { int w = Math.Min(4, width - x); @@ -157,10 +85,10 @@ namespace Ryujinx.Graphics.Texture.Encoders private static Block EncodeFast(ReadOnlySpan tile, int w, int h) { - (RgbaColor8 minColor, RgbaColor8 maxColor) = BC7Utils.GetMinMaxColors(tile, w, h); + (RgbaColor8 minColor, RgbaColor8 maxColor) = BC67Utils.GetMinMaxColors(tile, w, h); bool alphaNotOne = minColor.A != 255 || maxColor.A != 255; - int variance = BC7Utils.SquaredDifference(minColor.GetColor32(), maxColor.GetColor32()); + int variance = BC67Utils.SquaredDifference(minColor.GetColor32(), maxColor.GetColor32()); int selectedMode; int indexMode = 0; @@ -253,13 +181,13 @@ namespace Ryujinx.Graphics.Texture.Encoders { for (int im = 0; im < (m == 4 ? 2 : 1); im++) { - for (int p = 0; p < 1 << _modeInfos[m].PartitionBitCount; p++) + for (int p = 0; p < 1 << BC67Tables.BC7ModeInfos[m].PartitionBitCount; p++) { Block block = Encode(m, p, r, im, fastMode: false, tile, w, h, out int maxError); - if (maxError < lowestError || (maxError == lowestError && _modeInfos[m].SubsetCount < lowestErrorSubsets)) + if (maxError < lowestError || (maxError == lowestError && BC67Tables.BC7ModeInfos[m].SubsetCount < lowestErrorSubsets)) { lowestError = maxError; - lowestErrorSubsets = _modeInfos[m].SubsetCount; + lowestErrorSubsets = BC67Tables.BC7ModeInfos[m].SubsetCount; bestBlock = block; } } @@ -281,7 +209,7 @@ namespace Ryujinx.Graphics.Texture.Encoders int h, out int errorSum) { - ModeInfo modeInfo = _modeInfos[mode]; + BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode]; int subsetCount = modeInfo.SubsetCount; int partitionBitCount = modeInfo.PartitionBitCount; int rotationBitCount = modeInfo.RotationBitCount; @@ -379,7 +307,7 @@ namespace Ryujinx.Graphics.Texture.Encoders Span colorIndices = stackalloc byte[16]; Span alphaIndices = stackalloc byte[16]; - errorSum = BC7Utils.SelectIndices( + errorSum = BC67Utils.SelectIndices( tile, w, h, @@ -398,7 +326,7 @@ namespace Ryujinx.Graphics.Texture.Encoders if (separateAlphaIndices) { - errorSum += BC7Utils.SelectIndices( + errorSum += BC67Utils.SelectIndices( tile, w, h, @@ -420,7 +348,7 @@ namespace Ryujinx.Graphics.Texture.Encoders for (int i = 0; i < 3; i++) { - colorSwapSubset[i] = colorIndices[BC7Tables.FixUpIndices[subsetCount - 1][partition][i]] >= (colorIndexCount >> 1); + colorSwapSubset[i] = colorIndices[BC67Tables.FixUpIndices[subsetCount - 1][partition][i]] >= (colorIndexCount >> 1); } bool alphaSwapSubset = alphaIndices[0] >= (alphaIndexCount >> 1); @@ -462,13 +390,13 @@ namespace Ryujinx.Graphics.Texture.Encoders if (indexMode == 0 ? colorSwapSubset[subset] : alphaSwapSubset) { - block.Encode(BC7Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); - block.Encode(BC7Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); + block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); + block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); } else { - block.Encode(BC7Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); - block.Encode(BC7Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); + block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), colorDepth, pBit0), ref offset, colorDepth); + block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), colorDepth, pBit1), ref offset, colorDepth); } } } @@ -496,13 +424,13 @@ namespace Ryujinx.Graphics.Texture.Encoders if (separateAlphaIndices && indexMode == 0 ? alphaSwapSubset : colorSwapSubset[subset]) { - block.Encode(BC7Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); - block.Encode(BC7Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); + block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); + block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); } else { - block.Encode(BC7Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); - block.Encode(BC7Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); + block.Encode(BC67Utils.QuantizeComponent(color0.GetComponent(rotatedComp), alphaDepth, pBit0), ref offset, alphaDepth); + block.Encode(BC67Utils.QuantizeComponent(color1.GetComponent(rotatedComp), alphaDepth, pBit1), ref offset, alphaDepth); } } } @@ -512,11 +440,11 @@ namespace Ryujinx.Graphics.Texture.Encoders block.Encode((ulong)pBitValues[i], ref offset, 1); } - byte[] fixUpTable = BC7Tables.FixUpIndices[subsetCount - 1][partition]; + byte[] fixUpTable = BC67Tables.FixUpIndices[subsetCount - 1][partition]; for (int i = 0; i < 16; i++) { - int subset = BC7Tables.PartitionTable[subsetCount - 1][partition][i]; + int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][i]; byte index = colorIndices[i]; if (colorSwapSubset[subset]) @@ -561,12 +489,12 @@ namespace Ryujinx.Graphics.Texture.Encoders private static unsafe int GetEndPointSelectionErrorFast(ReadOnlySpan tile, int subsetCount, int partition, int w, int h, int maxError) { - byte[] partitionTable = BC7Tables.PartitionTable[subsetCount - 1][partition]; + byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; Span minColors = stackalloc RgbaColor8[subsetCount]; Span maxColors = stackalloc RgbaColor8[subsetCount]; - BC7Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); + BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); Span endPoints0 = stackalloc uint[subsetCount]; Span endPoints1 = stackalloc uint[subsetCount]; @@ -592,8 +520,8 @@ namespace Ryujinx.Graphics.Texture.Encoders int pBit0 = GetPBit(c0, 6, 0); int pBit1 = GetPBit(c1, 6, 0); - c0 = BC7Utils.Quantize(RgbaColor8.FromUInt32(c0), 6, 0, pBit0).ToUInt32(); - c1 = BC7Utils.Quantize(RgbaColor8.FromUInt32(c1), 6, 0, pBit1).ToUInt32(); + c0 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c0), 6, 0, pBit0).ToUInt32(); + c1 = BC67Utils.Quantize(RgbaColor8.FromUInt32(c1), 6, 0, pBit1).ToUInt32(); if (Sse41.IsSupported) { @@ -605,7 +533,7 @@ namespace Ryujinx.Graphics.Texture.Encoders Vector128 rWeights; Vector128 lWeights; - fixed (byte* pWeights = BC7Tables.Weights[1], pInvWeights = BC7Tables.InverseWeights[1]) + fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) { rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); @@ -670,7 +598,7 @@ namespace Ryujinx.Graphics.Texture.Encoders for (int i = 1; i < palette.Length - 1; i++) { - palette[i] = BC7Utils.Interpolate(e032, e132, i, 3); + palette[i] = BC67Utils.Interpolate(e032, e132, i, 3); } for (int i = 0; i < tile.Length; i++) @@ -687,7 +615,7 @@ namespace Ryujinx.Graphics.Texture.Encoders for (int j = 0; j < palette.Length; j++) { - int score = BC7Utils.SquaredDifference(color, palette[j]); + int score = BC67Utils.SquaredDifference(color, palette[j]); if (score < bestMatchScore) { @@ -723,12 +651,12 @@ namespace Ryujinx.Graphics.Texture.Encoders uint writeMask, bool fastMode) { - byte[] partitionTable = BC7Tables.PartitionTable[subsetCount - 1][partition]; + byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; Span minColors = stackalloc RgbaColor8[subsetCount]; Span maxColors = stackalloc RgbaColor8[subsetCount]; - BC7Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); + BC67Utils.GetMinMaxColors(partitionTable, tile, w, h, minColors, maxColors, subsetCount); uint inverseMask = ~writeMask; @@ -934,8 +862,8 @@ namespace Ryujinx.Graphics.Texture.Encoders return (default, default); } - minValue = BC7Utils.Quantize(minValue, colorDepth, alphaDepth); - maxValue = BC7Utils.Quantize(maxValue, colorDepth, alphaDepth); + minValue = BC67Utils.Quantize(minValue, colorDepth, alphaDepth); + maxValue = BC67Utils.Quantize(maxValue, colorDepth, alphaDepth); RgbaColor32 blockDir = maxValue.GetColor32() - minValue.GetColor32(); blockDir = RgbaColor32.DivideGuarded(blockDir << 6, new RgbaColor32(blockDir.R + blockDir.G + blockDir.B + blockDir.A), 0); @@ -946,7 +874,7 @@ namespace Ryujinx.Graphics.Texture.Encoders for (int i = 0; i < values.Length; i++) { RgbaColor8 color = values[i]; - int dist = RgbaColor32.Dot(BC7Utils.Quantize(color, colorDepth, alphaDepth).GetColor32(), blockDir); + int dist = RgbaColor32.Dot(BC67Utils.Quantize(color, colorDepth, alphaDepth).GetColor32(), blockDir); if (minDist >= dist) { @@ -1019,7 +947,7 @@ namespace Ryujinx.Graphics.Texture.Encoders int pBit0 = GetPBit(candidateE0.ToUInt32(), colorDepth, alphaDepth); int pBit1 = GetPBit(candidateE1.ToUInt32(), colorDepth, alphaDepth); - int errorSum = BC7Utils.SelectIndices( + int errorSum = BC67Utils.SelectIndices( MemoryMarshal.Cast(values), candidateE0.ToUInt32(), candidateE1.ToUInt32(), diff --git a/Ryujinx.Graphics.Texture/Encoders/BC7Tables.cs b/Ryujinx.Graphics.Texture/Utils/BC67Tables.cs similarity index 97% rename from Ryujinx.Graphics.Texture/Encoders/BC7Tables.cs rename to Ryujinx.Graphics.Texture/Utils/BC67Tables.cs index 6dddd283c..d890652cb 100644 --- a/Ryujinx.Graphics.Texture/Encoders/BC7Tables.cs +++ b/Ryujinx.Graphics.Texture/Utils/BC67Tables.cs @@ -1,7 +1,19 @@ -namespace Ryujinx.Graphics.Texture.Encoders +namespace Ryujinx.Graphics.Texture.Utils { - static class BC7Tables + static class BC67Tables { + public static readonly BC7ModeInfo[] BC7ModeInfos = new BC7ModeInfo[] + { + new BC7ModeInfo(3, 4, 6, 0, 0, 3, 0, 4, 0), + new BC7ModeInfo(2, 6, 2, 0, 0, 3, 0, 6, 0), + new BC7ModeInfo(3, 6, 0, 0, 0, 2, 0, 5, 0), + new BC7ModeInfo(2, 6, 4, 0, 0, 2, 0, 7, 0), + new BC7ModeInfo(1, 0, 0, 2, 1, 2, 3, 5, 6), + new BC7ModeInfo(1, 0, 0, 2, 0, 2, 2, 7, 8), + new BC7ModeInfo(1, 0, 2, 0, 0, 4, 0, 7, 7), + new BC7ModeInfo(2, 6, 4, 0, 0, 2, 0, 5, 5) + }; + public static readonly byte[][] Weights = { new byte[] { 0, 21, 43, 64 }, diff --git a/Ryujinx.Graphics.Texture/Encoders/BC7Utils.cs b/Ryujinx.Graphics.Texture/Utils/BC67Utils.cs similarity index 96% rename from Ryujinx.Graphics.Texture/Encoders/BC7Utils.cs rename to Ryujinx.Graphics.Texture/Utils/BC67Utils.cs index c43d90fb7..e6c3f6e76 100644 --- a/Ryujinx.Graphics.Texture/Encoders/BC7Utils.cs +++ b/Ryujinx.Graphics.Texture/Utils/BC67Utils.cs @@ -4,14 +4,14 @@ using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; -namespace Ryujinx.Graphics.Texture.Encoders +namespace Ryujinx.Graphics.Texture.Utils { - static class BC7Utils + static class BC67Utils { private static byte[][] _quantizationLut; private static byte[][] _quantizationLutNoPBit; - static BC7Utils() + static BC67Utils() { _quantizationLut = new byte[5][]; _quantizationLutNoPBit = new byte[5][]; @@ -322,7 +322,7 @@ namespace Ryujinx.Graphics.Texture.Encoders Vector128 rWeights; Vector128 lWeights; - fixed (byte* pWeights = BC7Tables.Weights[0], pInvWeights = BC7Tables.InverseWeights[0]) + fixed (byte* pWeights = BC67Tables.Weights[0], pInvWeights = BC67Tables.InverseWeights[0]) { rWeights = Sse2.LoadScalarVector128((uint*)pWeights).AsByte(); lWeights = Sse2.LoadScalarVector128((uint*)pInvWeights).AsByte(); @@ -394,7 +394,7 @@ namespace Ryujinx.Graphics.Texture.Encoders Vector128 rWeights; Vector128 lWeights; - fixed (byte* pWeights = BC7Tables.Weights[1], pInvWeights = BC7Tables.InverseWeights[1]) + fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) { rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); @@ -476,7 +476,7 @@ namespace Ryujinx.Graphics.Texture.Encoders Vector128 rWeights; Vector128 lWeights; - fixed (byte* pWeights = BC7Tables.Weights[2], pInvWeights = BC7Tables.InverseWeights[2]) + fixed (byte* pWeights = BC67Tables.Weights[2], pInvWeights = BC67Tables.InverseWeights[2]) { rWeights = Sse2.LoadVector128(pWeights); lWeights = Sse2.LoadVector128(pInvWeights); @@ -726,7 +726,7 @@ namespace Ryujinx.Graphics.Texture.Encoders int pBits, uint alphaMask) { - byte[] partitionTable = BC7Tables.PartitionTable[subsetCount - 1][partition]; + byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; uint alphaMaskForPalette = alphaMask; @@ -762,7 +762,7 @@ namespace Ryujinx.Graphics.Texture.Encoders Vector128 rWeights; Vector128 lWeights; - fixed (byte* pWeights = BC7Tables.Weights[0], pInvWeights = BC7Tables.InverseWeights[0]) + fixed (byte* pWeights = BC67Tables.Weights[0], pInvWeights = BC67Tables.InverseWeights[0]) { rWeights = Sse2.LoadScalarVector128((uint*)pWeights).AsByte(); lWeights = Sse2.LoadScalarVector128((uint*)pInvWeights).AsByte(); @@ -831,7 +831,7 @@ namespace Ryujinx.Graphics.Texture.Encoders int pBits, uint alphaMask) { - byte[] partitionTable = BC7Tables.PartitionTable[subsetCount - 1][partition]; + byte[] partitionTable = BC67Tables.PartitionTable[subsetCount - 1][partition]; uint alphaMaskForPalette = alphaMask; @@ -867,7 +867,7 @@ namespace Ryujinx.Graphics.Texture.Encoders Vector128 rWeights; Vector128 lWeights; - fixed (byte* pWeights = BC7Tables.Weights[1], pInvWeights = BC7Tables.InverseWeights[1]) + fixed (byte* pWeights = BC67Tables.Weights[1], pInvWeights = BC67Tables.InverseWeights[1]) { rWeights = Sse2.LoadScalarVector128((ulong*)pWeights).AsByte(); lWeights = Sse2.LoadScalarVector128((ulong*)pInvWeights).AsByte(); @@ -973,7 +973,7 @@ namespace Ryujinx.Graphics.Texture.Encoders Vector128 rWeights; Vector128 lWeights; - fixed (byte* pWeights = BC7Tables.Weights[2], pInvWeights = BC7Tables.InverseWeights[2]) + fixed (byte* pWeights = BC67Tables.Weights[2], pInvWeights = BC67Tables.InverseWeights[2]) { rWeights = Sse2.LoadVector128(pWeights); lWeights = Sse2.LoadVector128(pInvWeights); @@ -1129,7 +1129,7 @@ namespace Ryujinx.Graphics.Texture.Encoders { for (int tx = 0; tx < w; tx++) { - int subset = BC7Tables.PartitionTable[subsetCount - 1][partition][ty * 4 + tx]; + int subset = BC67Tables.PartitionTable[subsetCount - 1][partition][ty * 4 + tx]; uint color = tile[i++] | alphaMask; int bestMatchScore = int.MaxValue; @@ -1182,6 +1182,28 @@ namespace Ryujinx.Graphics.Texture.Encoders return (color1 * invWeightV + color2 * weightV + new RgbaColor32(32)) >> 6; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static RgbaColor32 Interpolate( + RgbaColor32 color1, + RgbaColor32 color2, + int colorWeightIndex, + int alphaWeightIndex, + int colorIndexBitCount, + int alphaIndexBitCount) + { + Debug.Assert(colorIndexBitCount >= 2 && colorIndexBitCount <= 4); + Debug.Assert(alphaIndexBitCount >= 2 && alphaIndexBitCount <= 4); + + int colorWeight = BC67Tables.Weights[colorIndexBitCount - 2][colorWeightIndex]; + int alphaWeight = BC67Tables.Weights[alphaIndexBitCount - 2][alphaWeightIndex]; + + RgbaColor32 weightV = new RgbaColor32(colorWeight); + weightV.A = alphaWeight; + RgbaColor32 invWeightV = new RgbaColor32(64) - weightV; + + return (color1 * invWeightV + color2 * weightV + new RgbaColor32(32)) >> 6; + } + public static RgbaColor8 Quantize(RgbaColor8 color, int colorBits, int alphaBits, int pBit = -1) { if (alphaBits == 0) diff --git a/Ryujinx.Graphics.Texture/Utils/BC7ModeInfo.cs b/Ryujinx.Graphics.Texture/Utils/BC7ModeInfo.cs new file mode 100644 index 000000000..749324bf0 --- /dev/null +++ b/Ryujinx.Graphics.Texture/Utils/BC7ModeInfo.cs @@ -0,0 +1,37 @@ +namespace Ryujinx.Graphics.Texture.Utils +{ + struct BC7ModeInfo + { + public readonly int SubsetCount; + public readonly int PartitionBitCount; + public readonly int PBits; + public readonly int RotationBitCount; + public readonly int IndexModeBitCount; + public readonly int ColorIndexBitCount; + public readonly int AlphaIndexBitCount; + public readonly int ColorDepth; + public readonly int AlphaDepth; + + public BC7ModeInfo( + int subsetCount, + int partitionBitsCount, + int pBits, + int rotationBitCount, + int indexModeBitCount, + int colorIndexBitCount, + int alphaIndexBitCount, + int colorDepth, + int alphaDepth) + { + SubsetCount = subsetCount; + PartitionBitCount = partitionBitsCount; + PBits = pBits; + RotationBitCount = rotationBitCount; + IndexModeBitCount = indexModeBitCount; + ColorIndexBitCount = colorIndexBitCount; + AlphaIndexBitCount = alphaIndexBitCount; + ColorDepth = colorDepth; + AlphaDepth = alphaDepth; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Utils/Block.cs b/Ryujinx.Graphics.Texture/Utils/Block.cs new file mode 100644 index 000000000..a8bae077d --- /dev/null +++ b/Ryujinx.Graphics.Texture/Utils/Block.cs @@ -0,0 +1,55 @@ +namespace Ryujinx.Graphics.Texture.Utils +{ + struct Block + { + public ulong Low; + public ulong High; + + public void Encode(ulong value, ref int offset, int bits) + { + if (offset >= 64) + { + High |= value << (offset - 64); + } + else + { + Low |= value << offset; + + if (offset + bits > 64) + { + int remainder = 64 - offset; + High |= value >> remainder; + } + } + + offset += bits; + } + + public ulong Decode(ref int offset, int bits) + { + ulong value; + ulong mask = bits == 64 ? ulong.MaxValue : (1UL << bits) - 1; + + if (offset >= 64) + { + value = (High >> (offset - 64)) & mask; + } + else + { + value = Low >> offset; + + if (offset + bits > 64) + { + int remainder = 64 - offset; + value |= High << remainder; + } + + value &= mask; + } + + offset += bits; + + return value; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Texture/Encoders/RgbaColor32.cs b/Ryujinx.Graphics.Texture/Utils/RgbaColor32.cs similarity index 89% rename from Ryujinx.Graphics.Texture/Encoders/RgbaColor32.cs rename to Ryujinx.Graphics.Texture/Utils/RgbaColor32.cs index ed4b7507e..412493274 100644 --- a/Ryujinx.Graphics.Texture/Encoders/RgbaColor32.cs +++ b/Ryujinx.Graphics.Texture/Utils/RgbaColor32.cs @@ -3,16 +3,35 @@ using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; -namespace Ryujinx.Graphics.Texture.Encoders +namespace Ryujinx.Graphics.Texture.Utils { struct RgbaColor32 : IEquatable { - private readonly Vector128 _color; + private Vector128 _color; - public int R => _color.GetElement(0); - public int G => _color.GetElement(1); - public int B => _color.GetElement(2); - public int A => _color.GetElement(3); + public int R + { + get => _color.GetElement(0); + set => _color = _color.WithElement(0, value); + } + + public int G + { + get => _color.GetElement(1); + set => _color = _color.WithElement(1, value); + } + + public int B + { + get => _color.GetElement(2); + set => _color = _color.WithElement(2, value); + } + + public int A + { + get => _color.GetElement(3); + set => _color = _color.WithElement(3, value); + } public RgbaColor32(Vector128 color) { @@ -193,7 +212,7 @@ namespace Ryujinx.Graphics.Texture.Encoders return HashCode.Combine(R, G, B, A); } - public override bool Equals(object obj) + public override bool Equals(object? obj) { return obj is RgbaColor32 other && Equals(other); } diff --git a/Ryujinx.Graphics.Texture/Encoders/RgbaColor8.cs b/Ryujinx.Graphics.Texture/Utils/RgbaColor8.cs similarity index 97% rename from Ryujinx.Graphics.Texture/Encoders/RgbaColor8.cs rename to Ryujinx.Graphics.Texture/Utils/RgbaColor8.cs index bbf3c086a..5f7dfb4b4 100644 --- a/Ryujinx.Graphics.Texture/Encoders/RgbaColor8.cs +++ b/Ryujinx.Graphics.Texture/Utils/RgbaColor8.cs @@ -3,7 +3,7 @@ using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; -namespace Ryujinx.Graphics.Texture.Encoders +namespace Ryujinx.Graphics.Texture.Utils { struct RgbaColor8 : IEquatable { diff --git a/Ryujinx.Graphics.Vulkan/FormatCapabilities.cs b/Ryujinx.Graphics.Vulkan/FormatCapabilities.cs index be2ead0bf..ea1f710a5 100644 --- a/Ryujinx.Graphics.Vulkan/FormatCapabilities.cs +++ b/Ryujinx.Graphics.Vulkan/FormatCapabilities.cs @@ -20,7 +20,20 @@ namespace Ryujinx.Graphics.Vulkan _table = new FormatFeatureFlags[Enum.GetNames(typeof(GAL.Format)).Length]; } - public bool FormatSupports(GAL.Format format, FormatFeatureFlags flags) + public bool FormatsSupports(FormatFeatureFlags flags, params GAL.Format[] formats) + { + foreach (GAL.Format format in formats) + { + if (!FormatSupports(flags, format)) + { + return false; + } + } + + return true; + } + + public bool FormatSupports(FormatFeatureFlags flags, GAL.Format format) { var formatFeatureFlags = _table[(int)format]; @@ -56,7 +69,7 @@ namespace Ryujinx.Graphics.Vulkan requiredFeatures |= FormatFeatureFlags.FormatFeatureStorageImageBit; } - if (!FormatSupports(srcFormat, requiredFeatures) || + if (!FormatSupports(requiredFeatures, srcFormat) || (srcFormat == GAL.Format.D24UnormS8Uint && VulkanConfiguration.ForceD24S8Unsupported)) { // The format is not supported. Can we convert it to a higher precision format? diff --git a/Ryujinx.Graphics.Vulkan/TextureView.cs b/Ryujinx.Graphics.Vulkan/TextureView.cs index 7e1350eea..1f5009127 100644 --- a/Ryujinx.Graphics.Vulkan/TextureView.cs +++ b/Ryujinx.Graphics.Vulkan/TextureView.cs @@ -298,8 +298,8 @@ namespace Ryujinx.Graphics.Vulkan return; } - else if (_gd.FormatCapabilities.FormatSupports(srcFormat, FormatFeatureFlags.FormatFeatureBlitSrcBit) && - _gd.FormatCapabilities.FormatSupports(dstFormat, FormatFeatureFlags.FormatFeatureBlitDstBit)) + else if (_gd.FormatCapabilities.FormatSupports(FormatFeatureFlags.FormatFeatureBlitSrcBit, srcFormat) && + _gd.FormatCapabilities.FormatSupports(FormatFeatureFlags.FormatFeatureBlitDstBit, dstFormat)) { TextureCopy.Blit( _gd.Api, @@ -604,8 +604,8 @@ namespace Ryujinx.Graphics.Vulkan private bool SupportsBlitFromD32FS8ToD32FAndS8() { var formatFeatureFlags = FormatFeatureFlags.FormatFeatureBlitSrcBit | FormatFeatureFlags.FormatFeatureBlitDstBit; - return _gd.FormatCapabilities.FormatSupports(GAL.Format.D32Float, formatFeatureFlags) && - _gd.FormatCapabilities.FormatSupports(GAL.Format.S8Uint, formatFeatureFlags); + return _gd.FormatCapabilities.FormatSupports(formatFeatureFlags, GAL.Format.D32Float) && + _gd.FormatCapabilities.FormatSupports(formatFeatureFlags, GAL.Format.S8Uint); } public TextureView GetView(GAL.Format format) diff --git a/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs b/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs index 2e715fdf7..30fa8626d 100644 --- a/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs +++ b/Ryujinx.Graphics.Vulkan/VulkanGraphicsDevice.cs @@ -315,6 +315,33 @@ namespace Ryujinx.Graphics.Vulkan public Capabilities GetCapabilities() { + FormatFeatureFlags compressedFormatFeatureFlags = + FormatFeatureFlags.FormatFeatureSampledImageBit | + FormatFeatureFlags.FormatFeatureSampledImageFilterLinearBit | + FormatFeatureFlags.FormatFeatureBlitSrcBit | + FormatFeatureFlags.FormatFeatureTransferSrcBit | + FormatFeatureFlags.FormatFeatureTransferDstBit; + + bool supportsBc123CompressionFormat = FormatCapabilities.FormatsSupports(compressedFormatFeatureFlags, + GAL.Format.Bc1RgbaSrgb, + GAL.Format.Bc1RgbaUnorm, + GAL.Format.Bc2Srgb, + GAL.Format.Bc2Unorm, + GAL.Format.Bc3Srgb, + GAL.Format.Bc3Unorm); + + bool supportsBc45CompressionFormat = FormatCapabilities.FormatsSupports(compressedFormatFeatureFlags, + GAL.Format.Bc4Snorm, + GAL.Format.Bc4Unorm, + GAL.Format.Bc5Snorm, + GAL.Format.Bc5Unorm); + + bool supportsBc67CompressionFormat = FormatCapabilities.FormatsSupports(compressedFormatFeatureFlags, + GAL.Format.Bc6HSfloat, + GAL.Format.Bc6HUfloat, + GAL.Format.Bc7Srgb, + GAL.Format.Bc7Unorm); + Api.GetPhysicalDeviceFeatures(_physicalDevice, out var features); Api.GetPhysicalDeviceProperties(_physicalDevice, out var properties); @@ -326,6 +353,9 @@ namespace Ryujinx.Graphics.Vulkan hasFrontFacingBug: IsIntelWindows, hasVectorIndexingBug: Vendor == Vendor.Qualcomm, supportsAstcCompression: features.TextureCompressionAstcLdr, + supportsBc123Compression: supportsBc123CompressionFormat, + supportsBc45Compression: supportsBc45CompressionFormat, + supportsBc67Compression: supportsBc67CompressionFormat, supports3DTextureCompression: true, supportsBgraFormat: true, supportsR4G4Format: false,