ARM exclusive monitor and multicore fixes (#589)

* Implement ARM exclusive load/store with compare exchange insts, and enable multicore by default

* Fix comment typo

* Support Linux and OSX on MemoryAlloc and CompareExchange128, some cleanup

* Use intel syntax on assembly code

* Adjust identation

* Add CPUID check and fix exclusive reservation granule size

* Update schema multicore scheduling default value

* Make the cpu id check code lower case aswell
This commit is contained in:
gdkchan 2019-02-18 20:52:06 -03:00 committed by jduncanator
parent dd00a4b62d
commit 932224f051
19 changed files with 954 additions and 261 deletions

View file

@ -14,6 +14,7 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Mono.Posix.NETStandard" Version="1.0.0" />
<PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" /> <PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
</ItemGroup> </ItemGroup>

View file

@ -32,8 +32,6 @@ namespace ChocolArm64
{ {
translator.ExecuteSubroutine(this, entrypoint); translator.ExecuteSubroutine(this, entrypoint);
memory.RemoveMonitor(ThreadState.Core);
WorkFinished?.Invoke(this, EventArgs.Empty); WorkFinished?.Invoke(this, EventArgs.Empty);
}); });
} }

View file

@ -23,7 +23,9 @@ namespace ChocolArm64.Instructions
public static void Clrex(ILEmitterCtx context) public static void Clrex(ILEmitterCtx context)
{ {
EmitMemoryCall(context, nameof(MemoryManager.ClearExclusive)); context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.ClearExclusiveAddress));
} }
public static void Dmb(ILEmitterCtx context) => EmitBarrier(context); public static void Dmb(ILEmitterCtx context) => EmitBarrier(context);
@ -37,12 +39,12 @@ namespace ChocolArm64.Instructions
private static void EmitLdr(ILEmitterCtx context, AccessType accType) private static void EmitLdr(ILEmitterCtx context, AccessType accType)
{ {
EmitLoad(context, accType, false); EmitLoad(context, accType, pair: false);
} }
private static void EmitLdp(ILEmitterCtx context, AccessType accType) private static void EmitLdp(ILEmitterCtx context, AccessType accType)
{ {
EmitLoad(context, accType, true); EmitLoad(context, accType, pair: true);
} }
private static void EmitLoad(ILEmitterCtx context, AccessType accType, bool pair) private static void EmitLoad(ILEmitterCtx context, AccessType accType, bool pair)
@ -57,32 +59,128 @@ namespace ChocolArm64.Instructions
EmitBarrier(context); EmitBarrier(context);
} }
if (exclusive)
{
EmitMemoryCall(context, nameof(MemoryManager.SetExclusive), op.Rn);
}
context.EmitLdint(op.Rn); context.EmitLdint(op.Rn);
context.EmitSttmp(); context.EmitSttmp();
context.EmitLdarg(TranslatedSub.MemoryArgIdx); if (exclusive)
{
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp(); context.EmitLdtmp();
EmitReadZxCall(context, op.Size); context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.SetExclusiveAddress));
}
context.EmitStintzr(op.Rt); void WriteExclusiveValue(string propName)
{
if (op.Size < 3)
{
context.Emit(OpCodes.Conv_U8);
}
context.EmitSttmp2();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdtmp2();
context.EmitCallPrivatePropSet(typeof(CpuThreadState), propName);
context.EmitLdtmp2();
if (op.Size < 3)
{
context.Emit(OpCodes.Conv_U4);
}
}
if (pair) if (pair)
{
//Exclusive loads should be atomic. For pairwise loads, we need to
//read all the data at once. For a 32-bits pairwise load, we do a
//simple 64-bits load, for a 128-bits load, we need to call a special
//method to read 128-bits atomically.
if (op.Size == 2)
{ {
context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp(); context.EmitLdtmp();
context.EmitLdc_I8(1 << op.Size);
context.Emit(OpCodes.Add); EmitReadZxCall(context, 3);
context.Emit(OpCodes.Dup);
//Mask low half.
context.Emit(OpCodes.Conv_U4);
if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
}
context.EmitStintzr(op.Rt);
//Shift high half.
context.EmitLsr(32);
context.Emit(OpCodes.Conv_U4);
if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh));
}
context.EmitStintzr(op.Rt2);
}
else if (op.Size == 3)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicReadInt128));
context.Emit(OpCodes.Dup);
//Load low part of the vector.
context.EmitLdc_I4(0);
context.EmitLdc_I4(3);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx));
if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
}
context.EmitStintzr(op.Rt);
//Load high part of the vector.
context.EmitLdc_I4(1);
context.EmitLdc_I4(3);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractIntZx));
if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueHigh));
}
context.EmitStintzr(op.Rt2);
}
else
{
throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
}
}
else
{
//8, 16, 32 or 64-bits (non-pairwise) load.
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
EmitReadZxCall(context, op.Size); EmitReadZxCall(context, op.Size);
context.EmitStintzr(op.Rt2); if (exclusive)
{
WriteExclusiveValue(nameof(CpuThreadState.ExclusiveValueLow));
}
context.EmitStintzr(op.Rt);
} }
} }
@ -99,12 +197,12 @@ namespace ChocolArm64.Instructions
private static void EmitStr(ILEmitterCtx context, AccessType accType) private static void EmitStr(ILEmitterCtx context, AccessType accType)
{ {
EmitStore(context, accType, false); EmitStore(context, accType, pair: false);
} }
private static void EmitStp(ILEmitterCtx context, AccessType accType) private static void EmitStp(ILEmitterCtx context, AccessType accType)
{ {
EmitStore(context, accType, true); EmitStore(context, accType, pair: true);
} }
private static void EmitStore(ILEmitterCtx context, AccessType accType, bool pair) private static void EmitStore(ILEmitterCtx context, AccessType accType, bool pair)
@ -119,66 +217,133 @@ namespace ChocolArm64.Instructions
EmitBarrier(context); EmitBarrier(context);
} }
if (exclusive)
{
ILLabel lblEx = new ILLabel(); ILLabel lblEx = new ILLabel();
ILLabel lblEnd = new ILLabel(); ILLabel lblEnd = new ILLabel();
if (exclusive) context.EmitLdarg(TranslatedSub.StateArgIdx);
{ context.EmitLdint(op.Rn);
EmitMemoryCall(context, nameof(MemoryManager.TestExclusive), op.Rn);
context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.CheckExclusiveAddress));
context.Emit(OpCodes.Brtrue_S, lblEx); context.Emit(OpCodes.Brtrue_S, lblEx);
context.EmitLdc_I8(1); //Address check failed, set error right away and do not store anything.
context.EmitLdc_I4(1);
context.EmitStintzr(op.Rs); context.EmitStintzr(op.Rs);
context.Emit(OpCodes.Br_S, lblEnd); context.Emit(OpCodes.Br, lblEnd);
}
//Address check passsed.
context.MarkLabel(lblEx); context.MarkLabel(lblEx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn); context.EmitLdint(op.Rn);
context.EmitLdintzr(op.Rt);
EmitWriteCall(context, op.Size); context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueLow));
void EmitCast()
{
//The input should be always int64.
switch (op.Size)
{
case 0: context.Emit(OpCodes.Conv_U1); break;
case 1: context.Emit(OpCodes.Conv_U2); break;
case 2: context.Emit(OpCodes.Conv_U4); break;
}
}
EmitCast();
if (pair) if (pair)
{ {
context.EmitLdarg(TranslatedSub.MemoryArgIdx); context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(1 << op.Size);
context.Emit(OpCodes.Add); context.EmitCallPrivatePropGet(typeof(CpuThreadState), nameof(CpuThreadState.ExclusiveValueHigh));
EmitCast();
context.EmitLdintzr(op.Rt);
EmitCast();
context.EmitLdintzr(op.Rt2); context.EmitLdintzr(op.Rt2);
EmitCast();
switch (op.Size)
{
case 2: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchange2xInt32)); break;
case 3: context.EmitPrivateCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt128)); break;
default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
}
}
else
{
context.EmitLdintzr(op.Rt);
EmitCast();
switch (op.Size)
{
case 0: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeByte)); break;
case 1: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt16)); break;
case 2: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt32)); break;
case 3: context.EmitCall(typeof(MemoryManager), nameof(MemoryManager.AtomicCompareExchangeInt64)); break;
default: throw new InvalidOperationException($"Invalid store size of {1 << op.Size} bytes.");
}
}
//The value returned is a bool, true if the values compared
//were equal and the new value was written, false otherwise.
//We need to invert this result, as on ARM 1 indicates failure,
//and 0 success on those instructions.
context.EmitLdc_I4(1);
context.Emit(OpCodes.Xor);
context.Emit(OpCodes.Dup);
context.Emit(OpCodes.Conv_U8);
context.EmitStintzr(op.Rs);
//Only clear the exclusive monitor if the store was successful (Rs = false).
context.Emit(OpCodes.Brtrue_S, lblEnd);
Clrex(context);
context.MarkLabel(lblEnd);
}
else
{
void EmitWrite(int rt, long offset)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
if (offset != 0)
{
context.EmitLdc_I8(offset);
context.Emit(OpCodes.Add);
}
context.EmitLdintzr(rt);
EmitWriteCall(context, op.Size); EmitWriteCall(context, op.Size);
} }
if (exclusive) EmitWrite(op.Rt, 0);
if (pair)
{ {
context.EmitLdc_I8(0); EmitWrite(op.Rt2, 1 << op.Size);
context.EmitStintzr(op.Rs);
EmitMemoryCall(context, nameof(MemoryManager.ClearExclusiveForStore));
} }
context.MarkLabel(lblEnd);
} }
private static void EmitMemoryCall(ILEmitterCtx context, string name, int rn = -1)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCallPropGet(typeof(CpuThreadState), nameof(CpuThreadState.Core));
if (rn != -1)
{
context.EmitLdint(rn);
}
context.EmitCall(typeof(MemoryManager), name);
} }
private static void EmitBarrier(ILEmitterCtx context) private static void EmitBarrier(ILEmitterCtx context)

View file

@ -0,0 +1,151 @@
using System;
using System.Runtime.InteropServices;
namespace ChocolArm64.Memory
{
static class CompareExchange128
{
private struct Int128
{
public ulong Low { get; }
public ulong High { get; }
public Int128(ulong low, ulong high)
{
Low = low;
High = high;
}
}
private delegate Int128 InterlockedCompareExchange(IntPtr address, Int128 expected, Int128 desired);
private delegate int GetCpuId();
private static InterlockedCompareExchange _interlockedCompareExchange;
static CompareExchange128()
{
if (RuntimeInformation.OSArchitecture != Architecture.X64 || !IsCmpxchg16bSupported())
{
throw new PlatformNotSupportedException();
}
byte[] interlockedCompareExchange128Code;
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
interlockedCompareExchange128Code = new byte[]
{
0x53, // push rbx
0x49, 0x8b, 0x00, // mov rax, [r8]
0x49, 0x8b, 0x19, // mov rbx, [r9]
0x49, 0x89, 0xca, // mov r10, rcx
0x49, 0x89, 0xd3, // mov r11, rdx
0x49, 0x8b, 0x49, 0x08, // mov rcx, [r9+8]
0x49, 0x8b, 0x50, 0x08, // mov rdx, [r8+8]
0xf0, 0x49, 0x0f, 0xc7, 0x0b, // lock cmpxchg16b [r11]
0x49, 0x89, 0x02, // mov [r10], rax
0x4c, 0x89, 0xd0, // mov rax, r10
0x49, 0x89, 0x52, 0x08, // mov [r10+8], rdx
0x5b, // pop rbx
0xc3 // ret
};
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
interlockedCompareExchange128Code = new byte[]
{
0x53, // push rbx
0x49, 0x89, 0xd1, // mov r9, rdx
0x48, 0x89, 0xcb, // mov rbx, rcx
0x48, 0x89, 0xf0, // mov rax, rsi
0x4c, 0x89, 0xca, // mov rdx, r9
0x4c, 0x89, 0xc1, // mov rcx, r8
0xf0, 0x48, 0x0f, 0xc7, 0x0f, // lock cmpxchg16b [rdi]
0x5b, // pop rbx
0xc3 // ret
};
}
else
{
throw new PlatformNotSupportedException();
}
IntPtr funcPtr = MapCodeAsExecutable(interlockedCompareExchange128Code);
_interlockedCompareExchange = Marshal.GetDelegateForFunctionPointer<InterlockedCompareExchange>(funcPtr);
}
private static bool IsCmpxchg16bSupported()
{
byte[] getCpuIdCode = new byte[]
{
0x53, // push rbx
0xb8, 0x01, 0x00, 0x00, 0x00, // mov eax, 0x1
0x0f, 0xa2, // cpuid
0x89, 0xc8, // mov eax, ecx
0x5b, // pop rbx
0xc3 // ret
};
IntPtr funcPtr = MapCodeAsExecutable(getCpuIdCode);
GetCpuId getCpuId = Marshal.GetDelegateForFunctionPointer<GetCpuId>(funcPtr);
int cpuId = getCpuId();
MemoryAlloc.Free(funcPtr);
return (cpuId & (1 << 13)) != 0;
}
private static IntPtr MapCodeAsExecutable(byte[] code)
{
ulong codeLength = (ulong)code.Length;
IntPtr funcPtr = MemoryAlloc.Allocate(codeLength);
unsafe
{
fixed (byte* codePtr = code)
{
byte* dest = (byte*)funcPtr;
long size = (long)codeLength;
Buffer.MemoryCopy(codePtr, dest, size, size);
}
}
MemoryAlloc.Reprotect(funcPtr, codeLength, MemoryProtection.Execute);
return funcPtr;
}
public static bool InterlockedCompareExchange128(
IntPtr address,
ulong expectedLow,
ulong expectedHigh,
ulong desiredLow,
ulong desiredHigh)
{
Int128 expected = new Int128(expectedLow, expectedHigh);
Int128 desired = new Int128(desiredLow, desiredHigh);
Int128 old = _interlockedCompareExchange(address, expected, desired);
return old.Low == expected.Low && old.High == expected.High;
}
public static void InterlockedRead128(IntPtr address, out ulong low, out ulong high)
{
Int128 zero = new Int128(0, 0);
Int128 old = _interlockedCompareExchange(address, zero, zero);
low = old.Low;
high = old.High;
}
}
}

View file

@ -0,0 +1,114 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace ChocolArm64.Memory
{
public static class MemoryAlloc
{
public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows);
public static IntPtr Allocate(ulong size)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
IntPtr sizeNint = new IntPtr((long)size);
return MemoryAllocWindows.Allocate(sizeNint);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return MemoryAllocUnix.Allocate(size);
}
else
{
throw new PlatformNotSupportedException();
}
}
public static IntPtr AllocateWriteTracked(ulong size)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
IntPtr sizeNint = new IntPtr((long)size);
return MemoryAllocWindows.AllocateWriteTracked(sizeNint);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return MemoryAllocUnix.Allocate(size);
}
else
{
throw new PlatformNotSupportedException();
}
}
public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission)
{
bool result;
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
IntPtr sizeNint = new IntPtr((long)size);
result = MemoryAllocWindows.Reprotect(address, sizeNint, permission);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
result = MemoryAllocUnix.Reprotect(address, size, permission);
}
else
{
throw new PlatformNotSupportedException();
}
if (!result)
{
throw new MemoryProtectionException(permission);
}
}
public static bool Free(IntPtr address)
{
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
return MemoryAllocWindows.Free(address);
}
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
{
return MemoryAllocUnix.Free(address);
}
else
{
throw new PlatformNotSupportedException();
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool GetModifiedPages(
IntPtr address,
IntPtr size,
IntPtr[] addresses,
out ulong count)
{
//This is only supported on windows, but returning
//false (failed) is also valid for platforms without
//write tracking support on the OS.
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
return MemoryAllocWindows.GetModifiedPages(address, size, addresses, out count);
}
else
{
count = 0;
return false;
}
}
}
}

View file

@ -0,0 +1,70 @@
using Mono.Unix.Native;
using System;
namespace ChocolArm64.Memory
{
static class MemoryAllocUnix
{
public static IntPtr Allocate(ulong size)
{
ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
const MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE;
const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS;
IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0);
if (ptr == IntPtr.Zero)
{
throw new OutOfMemoryException();
}
unsafe
{
ptr = new IntPtr(ptr.ToInt64() + (long)pageSize);
*((ulong*)ptr - 1) = size;
}
return ptr;
}
public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection)
{
MmapProts prot = GetProtection(protection);
return Syscall.mprotect(address, size, prot) == 0;
}
private static MmapProts GetProtection(Memory.MemoryProtection protection)
{
switch (protection)
{
case Memory.MemoryProtection.None: return MmapProts.PROT_NONE;
case Memory.MemoryProtection.Read: return MmapProts.PROT_READ;
case Memory.MemoryProtection.ReadAndWrite: return MmapProts.PROT_READ | MmapProts.PROT_WRITE;
case Memory.MemoryProtection.ReadAndExecute: return MmapProts.PROT_READ | MmapProts.PROT_EXEC;
case Memory.MemoryProtection.Execute: return MmapProts.PROT_EXEC;
default: throw new ArgumentException($"Invalid permission \"{protection}\".");
}
}
public static bool Free(IntPtr address)
{
ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
ulong size;
unsafe
{
size = *((ulong*)address - 1);
address = new IntPtr(address.ToInt64() - (long)pageSize);
}
return Syscall.munmap(address, size + pageSize) == 0;
}
}
}

View file

@ -0,0 +1,155 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace ChocolArm64.Memory
{
static class MemoryAllocWindows
{
[Flags]
private enum AllocationType : uint
{
Commit = 0x1000,
Reserve = 0x2000,
Decommit = 0x4000,
Release = 0x8000,
Reset = 0x80000,
Physical = 0x400000,
TopDown = 0x100000,
WriteWatch = 0x200000,
LargePages = 0x20000000
}
[Flags]
private enum MemoryProtection
{
NoAccess = 0x01,
ReadOnly = 0x02,
ReadWrite = 0x04,
WriteCopy = 0x08,
Execute = 0x10,
ExecuteRead = 0x20,
ExecuteReadWrite = 0x40,
ExecuteWriteCopy = 0x80,
GuardModifierflag = 0x100,
NoCacheModifierflag = 0x200,
WriteCombineModifierflag = 0x400
}
private enum WriteWatchFlags : uint
{
None = 0,
Reset = 1
}
[DllImport("kernel32.dll")]
private static extern IntPtr VirtualAlloc(
IntPtr lpAddress,
IntPtr dwSize,
AllocationType flAllocationType,
MemoryProtection flProtect);
[DllImport("kernel32.dll")]
private static extern bool VirtualProtect(
IntPtr lpAddress,
IntPtr dwSize,
MemoryProtection flNewProtect,
out MemoryProtection lpflOldProtect);
[DllImport("kernel32.dll")]
private static extern bool VirtualFree(
IntPtr lpAddress,
uint dwSize,
AllocationType dwFreeType);
[DllImport("kernel32.dll")]
private static extern int GetWriteWatch(
WriteWatchFlags dwFlags,
IntPtr lpBaseAddress,
IntPtr dwRegionSize,
IntPtr[] lpAddresses,
ref ulong lpdwCount,
out uint lpdwGranularity);
public static IntPtr Allocate(IntPtr size)
{
const AllocationType flags =
AllocationType.Reserve |
AllocationType.Commit;
IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
if (ptr == IntPtr.Zero)
{
throw new OutOfMemoryException();
}
return ptr;
}
public static IntPtr AllocateWriteTracked(IntPtr size)
{
const AllocationType flags =
AllocationType.Reserve |
AllocationType.Commit |
AllocationType.WriteWatch;
IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
if (ptr == IntPtr.Zero)
{
throw new OutOfMemoryException();
}
return ptr;
}
public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection)
{
MemoryProtection prot = GetProtection(protection);
return VirtualProtect(address, size, prot, out _);
}
private static MemoryProtection GetProtection(Memory.MemoryProtection protection)
{
switch (protection)
{
case Memory.MemoryProtection.None: return MemoryProtection.NoAccess;
case Memory.MemoryProtection.Read: return MemoryProtection.ReadOnly;
case Memory.MemoryProtection.ReadAndWrite: return MemoryProtection.ReadWrite;
case Memory.MemoryProtection.ReadAndExecute: return MemoryProtection.ExecuteRead;
case Memory.MemoryProtection.Execute: return MemoryProtection.Execute;
default: throw new ArgumentException($"Invalid permission \"{protection}\".");
}
}
public static bool Free(IntPtr address)
{
return VirtualFree(address, 0, AllocationType.Release);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool GetModifiedPages(
IntPtr address,
IntPtr size,
IntPtr[] addresses,
out ulong count)
{
ulong pagesCount = (ulong)addresses.Length;
int result = GetWriteWatch(
WriteWatchFlags.Reset,
address,
size,
addresses,
ref pagesCount,
out uint granularity);
count = pagesCount;
return result == 0;
}
}
}

View file

@ -1,16 +1,16 @@
using ChocolArm64.Events; using ChocolArm64.Events;
using ChocolArm64.Exceptions; using ChocolArm64.Exceptions;
using ChocolArm64.Instructions; using ChocolArm64.Instructions;
using ChocolArm64.State;
using System; using System;
using System.Collections.Concurrent; using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.X86;
using System.Threading; using System.Threading;
using static ChocolArm64.Memory.CompareExchange128;
namespace ChocolArm64.Memory namespace ChocolArm64.Memory
{ {
public unsafe class MemoryManager : IMemory, IDisposable public unsafe class MemoryManager : IMemory, IDisposable
@ -30,21 +30,6 @@ namespace ChocolArm64.Memory
private const int PtLvl0Bit = PageBits + PtLvl1Bits; private const int PtLvl0Bit = PageBits + PtLvl1Bits;
private const int PtLvl1Bit = PageBits; private const int PtLvl1Bit = PageBits;
private const long ErgMask = (4 << CpuThreadState.ErgSizeLog2) - 1;
private class ArmMonitor
{
public long Position;
public bool ExState;
public bool HasExclusiveAccess(long position)
{
return Position == position && ExState;
}
}
private Dictionary<int, ArmMonitor> _monitors;
private ConcurrentDictionary<long, IntPtr> _observedPages; private ConcurrentDictionary<long, IntPtr> _observedPages;
public IntPtr Ram { get; private set; } public IntPtr Ram { get; private set; }
@ -59,8 +44,6 @@ namespace ChocolArm64.Memory
public MemoryManager(IntPtr ram) public MemoryManager(IntPtr ram)
{ {
_monitors = new Dictionary<int, ArmMonitor>();
_observedPages = new ConcurrentDictionary<long, IntPtr>(); _observedPages = new ConcurrentDictionary<long, IntPtr>();
Ram = ram; Ram = ram;
@ -75,104 +58,139 @@ namespace ChocolArm64.Memory
} }
} }
public void RemoveMonitor(int core) internal bool AtomicCompareExchange2xInt32(
long position,
int expectedLow,
int expectedHigh,
int desiredLow,
int desiredHigh)
{ {
lock (_monitors) long expected = (uint)expectedLow;
{ long desired = (uint)desiredLow;
ClearExclusive(core);
_monitors.Remove(core); expected |= (long)expectedHigh << 32;
} desired |= (long)desiredHigh << 32;
return AtomicCompareExchangeInt64(position, expected, desired);
} }
public void SetExclusive(int core, long position) internal bool AtomicCompareExchangeInt128(
long position,
ulong expectedLow,
ulong expectedHigh,
ulong desiredLow,
ulong desiredHigh)
{ {
position &= ~ErgMask; if ((position & 0xf) != 0)
lock (_monitors)
{ {
foreach (ArmMonitor mon in _monitors.Values) AbortWithAlignmentFault(position);
{
if (mon.Position == position && mon.ExState)
{
mon.ExState = false;
}
} }
if (!_monitors.TryGetValue(core, out ArmMonitor threadMon)) IntPtr ptr = new IntPtr(TranslateWrite(position));
return InterlockedCompareExchange128(ptr, expectedLow, expectedHigh, desiredLow, desiredHigh);
}
internal Vector128<float> AtomicReadInt128(long position)
{ {
threadMon = new ArmMonitor(); if ((position & 0xf) != 0)
_monitors.Add(core, threadMon);
}
threadMon.Position = position;
threadMon.ExState = true;
}
}
public bool TestExclusive(int core, long position)
{ {
//Note: Any call to this method also should be followed by a AbortWithAlignmentFault(position);
//call to ClearExclusiveForStore if this method returns true.
position &= ~ErgMask;
Monitor.Enter(_monitors);
if (!_monitors.TryGetValue(core, out ArmMonitor threadMon))
{
Monitor.Exit(_monitors);
return false;
} }
bool exState = threadMon.HasExclusiveAccess(position); IntPtr ptr = new IntPtr(Translate(position));
if (!exState) InterlockedRead128(ptr, out ulong low, out ulong high);
{
Monitor.Exit(_monitors); Vector128<float> vector = default(Vector128<float>);
vector = VectorHelper.VectorInsertInt(low, vector, 0, 3);
vector = VectorHelper.VectorInsertInt(high, vector, 1, 3);
return vector;
} }
return exState; public bool AtomicCompareExchangeByte(long position, byte expected, byte desired)
{
int* ptr = (int*)Translate(position);
int currentValue = *ptr;
int expected32 = (currentValue & ~byte.MaxValue) | expected;
int desired32 = (currentValue & ~byte.MaxValue) | desired;
return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
} }
public void ClearExclusiveForStore(int core) public bool AtomicCompareExchangeInt16(long position, short expected, short desired)
{ {
if (_monitors.TryGetValue(core, out ArmMonitor threadMon)) if ((position & 1) != 0)
{ {
threadMon.ExState = false; AbortWithAlignmentFault(position);
} }
Monitor.Exit(_monitors); int* ptr = (int*)Translate(position);
int currentValue = *ptr;
int expected32 = (currentValue & ~ushort.MaxValue) | (ushort)expected;
int desired32 = (currentValue & ~ushort.MaxValue) | (ushort)desired;
return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
} }
public void ClearExclusive(int core) public bool AtomicCompareExchangeInt32(long position, int expected, int desired)
{ {
lock (_monitors) if ((position & 3) != 0)
{ {
if (_monitors.TryGetValue(core, out ArmMonitor threadMon)) AbortWithAlignmentFault(position);
{
threadMon.ExState = false;
}
}
} }
public void WriteInt32ToSharedAddr(long position, int value) int* ptr = (int*)TranslateWrite(position);
{
long maskedPosition = position & ~ErgMask;
lock (_monitors) return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
{
foreach (ArmMonitor mon in _monitors.Values)
{
if (mon.Position == maskedPosition && mon.ExState)
{
mon.ExState = false;
}
} }
WriteInt32(position, value); public bool AtomicCompareExchangeInt64(long position, long expected, long desired)
{
if ((position & 7) != 0)
{
AbortWithAlignmentFault(position);
} }
long* ptr = (long*)TranslateWrite(position);
return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
}
public int AtomicIncrementInt32(long position)
{
if ((position & 3) != 0)
{
AbortWithAlignmentFault(position);
}
int* ptr = (int*)TranslateWrite(position);
return Interlocked.Increment(ref *ptr);
}
public int AtomicDecrementInt32(long position)
{
if ((position & 3) != 0)
{
AbortWithAlignmentFault(position);
}
int* ptr = (int*)TranslateWrite(position);
return Interlocked.Decrement(ref *ptr);
}
private void AbortWithAlignmentFault(long position)
{
//TODO: Abort mode and exception support on the CPU.
throw new InvalidOperationException($"Tried to compare exchange a misaligned address 0x{position:X16}.");
} }
public sbyte ReadSByte(long position) public sbyte ReadSByte(long position)

View file

@ -0,0 +1,16 @@
using System;
namespace ChocolArm64.Memory
{
[Flags]
public enum MemoryProtection
{
None = 0,
Read = 1 << 0,
Write = 1 << 1,
Execute = 1 << 2,
ReadAndWrite = Read | Write,
ReadAndExecute = Read | Execute
}
}

View file

@ -0,0 +1,10 @@
using System;
namespace ChocolArm64.Memory
{
class MemoryProtectionException : Exception
{
public MemoryProtectionException(MemoryProtection protection) :
base($"Failed to set memory protection to \"{protection}\".") { }
}
}

View file

@ -37,7 +37,6 @@ namespace ChocolArm64.State
public int ElrHyp; public int ElrHyp;
public bool Running { get; set; } public bool Running { get; set; }
public int Core { get; set; }
private bool _interrupted; private bool _interrupted;
@ -85,6 +84,16 @@ namespace ChocolArm64.State
internal Translator CurrentTranslator; internal Translator CurrentTranslator;
private ulong _exclusiveAddress;
internal ulong ExclusiveValueLow { get; set; }
internal ulong ExclusiveValueHigh { get; set; }
public CpuThreadState()
{
ClearExclusiveAddress();
}
static CpuThreadState() static CpuThreadState()
{ {
_hostTickFreq = 1.0 / Stopwatch.Frequency; _hostTickFreq = 1.0 / Stopwatch.Frequency;
@ -94,6 +103,26 @@ namespace ChocolArm64.State
_tickCounter.Start(); _tickCounter.Start();
} }
internal void SetExclusiveAddress(ulong address)
{
_exclusiveAddress = GetMaskedExclusiveAddress(address);
}
internal bool CheckExclusiveAddress(ulong address)
{
return GetMaskedExclusiveAddress(address) == _exclusiveAddress;
}
internal void ClearExclusiveAddress()
{
_exclusiveAddress = ulong.MaxValue;
}
private ulong GetMaskedExclusiveAddress(ulong address)
{
return address & ~((4UL << ErgSizeLog2) - 1);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal bool Synchronize(int bbWeight) internal bool Synchronize(int bbWeight)
{ {

View file

@ -49,6 +49,7 @@ namespace ChocolArm64.Translation
private const int CmpOptTmp2Index = -4; private const int CmpOptTmp2Index = -4;
private const int VecTmp1Index = -5; private const int VecTmp1Index = -5;
private const int VecTmp2Index = -6; private const int VecTmp2Index = -6;
private const int IntTmp2Index = -7;
public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph) public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph)
{ {
@ -562,6 +563,9 @@ namespace ChocolArm64.Translation
public void EmitLdtmp() => EmitLdint(IntTmpIndex); public void EmitLdtmp() => EmitLdint(IntTmpIndex);
public void EmitSttmp() => EmitStint(IntTmpIndex); public void EmitSttmp() => EmitStint(IntTmpIndex);
public void EmitLdtmp2() => EmitLdint(IntTmp2Index);
public void EmitSttmp2() => EmitStint(IntTmp2Index);
public void EmitLdvectmp() => EmitLdvec(VecTmp1Index); public void EmitLdvectmp() => EmitLdvec(VecTmp1Index);
public void EmitStvectmp() => EmitStvec(VecTmp1Index); public void EmitStvectmp() => EmitStvec(VecTmp1Index);
@ -635,6 +639,36 @@ namespace ChocolArm64.Translation
EmitCall(objType.GetMethod($"set_{propName}")); EmitCall(objType.GetMethod($"set_{propName}"));
} }
public void EmitCallPrivatePropGet(Type objType, string propName)
{
if (objType == null)
{
throw new ArgumentNullException(nameof(objType));
}
if (propName == null)
{
throw new ArgumentNullException(nameof(propName));
}
EmitPrivateCall(objType, $"get_{propName}");
}
public void EmitCallPrivatePropSet(Type objType, string propName)
{
if (objType == null)
{
throw new ArgumentNullException(nameof(objType));
}
if (propName == null)
{
throw new ArgumentNullException(nameof(propName));
}
EmitPrivateCall(objType, $"set_{propName}");
}
public void EmitCall(Type objType, string mthdName) public void EmitCall(Type objType, string mthdName)
{ {
if (objType == null) if (objType == null)

View file

@ -66,7 +66,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common
if (currentProcess.CpuMemory.IsMapped((long)address) && if (currentProcess.CpuMemory.IsMapped((long)address) &&
currentProcess.CpuMemory.IsMapped((long)address + 3)) currentProcess.CpuMemory.IsMapped((long)address + 3))
{ {
currentProcess.CpuMemory.WriteInt32ToSharedAddr((long)address, value); currentProcess.CpuMemory.WriteInt32((long)address, value);
return true; return true;
} }

View file

@ -92,8 +92,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
if (coreContext.CurrentThread != null) if (coreContext.CurrentThread != null)
{ {
coreContext.CurrentThread.ClearExclusive();
CoreManager.Set(coreContext.CurrentThread.Context.Work); CoreManager.Set(coreContext.CurrentThread.Context.Work);
coreContext.CurrentThread.Context.Execute(); coreContext.CurrentThread.Context.Execute();

View file

@ -228,43 +228,31 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
KProcess currentProcess = _system.Scheduler.GetCurrentProcess(); KProcess currentProcess = _system.Scheduler.GetCurrentProcess();
currentProcess.CpuMemory.SetExclusive(0, (long)address); int mutexValue, newMutexValue;
if (!KernelTransfer.UserToKernelInt32(_system, address, out int mutexValue)) do
{
if (!KernelTransfer.UserToKernelInt32(_system, address, out mutexValue))
{ {
//Invalid address. //Invalid address.
currentProcess.CpuMemory.ClearExclusive(0);
requester.SignaledObj = null; requester.SignaledObj = null;
requester.ObjSyncResult = KernelResult.InvalidMemState; requester.ObjSyncResult = KernelResult.InvalidMemState;
return null; return null;
} }
while (true)
{
if (currentProcess.CpuMemory.TestExclusive(0, (long)address))
{
if (mutexValue != 0) if (mutexValue != 0)
{ {
//Update value to indicate there is a mutex waiter now. //Update value to indicate there is a mutex waiter now.
currentProcess.CpuMemory.WriteInt32((long)address, mutexValue | HasListenersMask); newMutexValue = mutexValue | HasListenersMask;
} }
else else
{ {
//No thread owning the mutex, assign to requesting thread. //No thread owning the mutex, assign to requesting thread.
currentProcess.CpuMemory.WriteInt32((long)address, requester.ThreadHandleForUserMutex); newMutexValue = requester.ThreadHandleForUserMutex;
} }
currentProcess.CpuMemory.ClearExclusiveForStore(0);
break;
}
currentProcess.CpuMemory.SetExclusive(0, (long)address);
mutexValue = currentProcess.CpuMemory.ReadInt32((long)address);
} }
while (!currentProcess.CpuMemory.AtomicCompareExchangeInt32((long)address, mutexValue, newMutexValue));
if (mutexValue == 0) if (mutexValue == 0)
{ {
@ -392,9 +380,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
KProcess currentProcess = _system.Scheduler.GetCurrentProcess(); KProcess currentProcess = _system.Scheduler.GetCurrentProcess();
//If ShouldDecrement is true, do atomic decrement of the value at Address.
currentProcess.CpuMemory.SetExclusive(0, (long)address);
if (!KernelTransfer.UserToKernelInt32(_system, address, out int currentValue)) if (!KernelTransfer.UserToKernelInt32(_system, address, out int currentValue))
{ {
_system.CriticalSection.Leave(); _system.CriticalSection.Leave();
@ -404,25 +389,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
if (shouldDecrement) if (shouldDecrement)
{ {
while (currentValue < value) currentValue = currentProcess.CpuMemory.AtomicDecrementInt32((long)address) + 1;
{
if (currentProcess.CpuMemory.TestExclusive(0, (long)address))
{
currentProcess.CpuMemory.WriteInt32((long)address, currentValue - 1);
currentProcess.CpuMemory.ClearExclusiveForStore(0);
break;
} }
currentProcess.CpuMemory.SetExclusive(0, (long)address);
currentValue = currentProcess.CpuMemory.ReadInt32((long)address);
}
}
currentProcess.CpuMemory.ClearExclusive(0);
if (currentValue < value) if (currentValue < value)
{ {
if (timeout == 0) if (timeout == 0)
@ -511,39 +480,25 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
KProcess currentProcess = _system.Scheduler.GetCurrentProcess(); KProcess currentProcess = _system.Scheduler.GetCurrentProcess();
currentProcess.CpuMemory.SetExclusive(0, (long)address); int currentValue;
if (!KernelTransfer.UserToKernelInt32(_system, address, out int currentValue)) do
{
if (!KernelTransfer.UserToKernelInt32(_system, address, out currentValue))
{ {
_system.CriticalSection.Leave(); _system.CriticalSection.Leave();
return KernelResult.InvalidMemState; return KernelResult.InvalidMemState;
} }
while (currentValue == value)
{
if (currentProcess.CpuMemory.TestExclusive(0, (long)address))
{
currentProcess.CpuMemory.WriteInt32((long)address, currentValue + 1);
currentProcess.CpuMemory.ClearExclusiveForStore(0);
break;
}
currentProcess.CpuMemory.SetExclusive(0, (long)address);
currentValue = currentProcess.CpuMemory.ReadInt32((long)address);
}
currentProcess.CpuMemory.ClearExclusive(0);
if (currentValue != value) if (currentValue != value)
{ {
_system.CriticalSection.Leave(); _system.CriticalSection.Leave();
return KernelResult.InvalidState; return KernelResult.InvalidState;
} }
}
while (!currentProcess.CpuMemory.AtomicCompareExchangeInt32((long)address, currentValue, currentValue + 1));
WakeArbiterThreads(address, count); WakeArbiterThreads(address, count);
@ -582,39 +537,25 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
KProcess currentProcess = _system.Scheduler.GetCurrentProcess(); KProcess currentProcess = _system.Scheduler.GetCurrentProcess();
currentProcess.CpuMemory.SetExclusive(0, (long)address); int currentValue;
if (!KernelTransfer.UserToKernelInt32(_system, address, out int currentValue)) do
{
if (!KernelTransfer.UserToKernelInt32(_system, address, out currentValue))
{ {
_system.CriticalSection.Leave(); _system.CriticalSection.Leave();
return KernelResult.InvalidMemState; return KernelResult.InvalidMemState;
} }
while (currentValue == value)
{
if (currentProcess.CpuMemory.TestExclusive(0, (long)address))
{
currentProcess.CpuMemory.WriteInt32((long)address, currentValue + offset);
currentProcess.CpuMemory.ClearExclusiveForStore(0);
break;
}
currentProcess.CpuMemory.SetExclusive(0, (long)address);
currentValue = currentProcess.CpuMemory.ReadInt32((long)address);
}
currentProcess.CpuMemory.ClearExclusive(0);
if (currentValue != value) if (currentValue != value)
{ {
_system.CriticalSection.Leave(); _system.CriticalSection.Leave();
return KernelResult.InvalidState; return KernelResult.InvalidState;
} }
}
while (!currentProcess.CpuMemory.AtomicCompareExchangeInt32((long)address, currentValue, currentValue + offset));
WakeArbiterThreads(address, count); WakeArbiterThreads(address, count);

View file

@ -70,8 +70,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
CurrentThread.TotalTimeRunning += currentTime - CurrentThread.LastScheduledTime; CurrentThread.TotalTimeRunning += currentTime - CurrentThread.LastScheduledTime;
CurrentThread.LastScheduledTime = currentTime; CurrentThread.LastScheduledTime = currentTime;
CurrentThread.ClearExclusive();
_coreManager.Set(CurrentThread.Context.Work); _coreManager.Set(CurrentThread.Context.Work);
CurrentThread.Context.Execute(); CurrentThread.Context.Execute();

View file

@ -1004,11 +1004,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
Context.ThreadState.X1 = (ulong)threadHandle; Context.ThreadState.X1 = (ulong)threadHandle;
} }
public void ClearExclusive()
{
Owner.CpuMemory.ClearExclusive(CurrentCore);
}
public void TimeUp() public void TimeUp()
{ {
ReleaseAndResume(); ReleaseAndResume();

View file

@ -36,7 +36,7 @@
"enable_vsync": true, "enable_vsync": true,
// Enable or Disable Multi-core scheduling of threads // Enable or Disable Multi-core scheduling of threads
"enable_multicore_scheduling": false, "enable_multicore_scheduling": true,
// Enable integrity checks on Switch content files // Enable integrity checks on Switch content files
"enable_fs_integrity_checks": true, "enable_fs_integrity_checks": true,

View file

@ -382,7 +382,7 @@
"type": "boolean", "type": "boolean",
"title": "Enable Multicore Scheduling", "title": "Enable Multicore Scheduling",
"description": "Enables or disables multi-core scheduling of threads", "description": "Enables or disables multi-core scheduling of threads",
"default": false, "default": true,
"examples": [ "examples": [
true, true,
false false