diff --git a/src/ARMeilleure/Memory/IJitMemoryBlock.cs b/src/ARMeilleure/Memory/IJitMemoryBlock.cs index cd49f314a..c103fe8d1 100644 --- a/src/ARMeilleure/Memory/IJitMemoryBlock.cs +++ b/src/ARMeilleure/Memory/IJitMemoryBlock.cs @@ -8,6 +8,7 @@ namespace ARMeilleure.Memory void Commit(ulong offset, ulong size); + void MapAsRw(ulong offset, ulong size); void MapAsRx(ulong offset, ulong size); void MapAsRwx(ulong offset, ulong size); } diff --git a/src/Ryujinx.Cpu/Jit/JitCpuContext.cs b/src/Ryujinx.Cpu/Jit/JitCpuContext.cs index 6563d699a..a5944097d 100644 --- a/src/Ryujinx.Cpu/Jit/JitCpuContext.cs +++ b/src/Ryujinx.Cpu/Jit/JitCpuContext.cs @@ -11,7 +11,7 @@ namespace Ryujinx.Cpu.Jit public JitCpuContext(ITickSource tickSource, IMemoryManager memory, bool for64Bit) { _tickSource = tickSource; - _translator = new Translator(new JitMemoryAllocator(), memory, for64Bit); + _translator = new Translator(new JitMemoryAllocator(forJit: true), memory, for64Bit); memory.UnmapEvent += UnmapHandler; } diff --git a/src/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs b/src/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs index 529a1a808..926dd8a0c 100644 --- a/src/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs +++ b/src/Ryujinx.Cpu/Jit/JitMemoryAllocator.cs @@ -5,8 +5,15 @@ namespace Ryujinx.Cpu.Jit { public class JitMemoryAllocator : IJitMemoryAllocator { + private readonly MemoryAllocationFlags _jitFlag; + + public JitMemoryAllocator(bool forJit = false) + { + _jitFlag = forJit ? MemoryAllocationFlags.Jit : MemoryAllocationFlags.None; + } + public IJitMemoryBlock Allocate(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.None); - public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.Jit); + public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve | _jitFlag); public ulong GetPageSize() => MemoryBlock.GetPageSize(); } diff --git a/src/Ryujinx.Cpu/Jit/JitMemoryBlock.cs b/src/Ryujinx.Cpu/Jit/JitMemoryBlock.cs index bcacd116a..bd07d349c 100644 --- a/src/Ryujinx.Cpu/Jit/JitMemoryBlock.cs +++ b/src/Ryujinx.Cpu/Jit/JitMemoryBlock.cs @@ -16,6 +16,7 @@ namespace Ryujinx.Cpu.Jit } public void Commit(ulong offset, ulong size) => _impl.Commit(offset, size); + public void MapAsRw(ulong offset, ulong size) => _impl.Reprotect(offset, size, MemoryPermission.ReadAndWrite); public void MapAsRx(ulong offset, ulong size) => _impl.Reprotect(offset, size, MemoryPermission.ReadAndExecute); public void MapAsRwx(ulong offset, ulong size) => _impl.Reprotect(offset, size, MemoryPermission.ReadWriteExecute); diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs b/src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs index 64583cc3e..3837824f3 100644 --- a/src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs +++ b/src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs @@ -1,5 +1,7 @@ +using Ryujinx.Cpu.LightningJit.Arm32; using System; using System.Collections.Generic; +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; namespace Ryujinx.Cpu.LightningJit.Cache @@ -38,7 +40,7 @@ namespace Ryujinx.Cpu.LightningJit.Cache if (block.Size > size) { - _blocks[i] = new MemoryBlock(block.Offset + size, block.Size - size); + _blocks[i] = new(block.Offset + size, block.Size - size); return block.Offset; } else if (block.Size == size) @@ -52,6 +54,40 @@ namespace Ryujinx.Cpu.LightningJit.Cache return -1; } + public void ForceAllocation(int offset, int size) + { + int index = _blocks.BinarySearch(new(offset, size)); + + if (index < 0) + { + index = ~index; + } + + int endOffset = offset + size; + + MemoryBlock block = _blocks[index]; + + Debug.Assert(block.Offset <= offset && block.Offset + block.Size >= endOffset); + + if (offset > block.Offset && endOffset < block.Offset + block.Size) + { + _blocks[index] = new(block.Offset, offset - block.Offset); + _blocks.Insert(index + 1, new(endOffset, (block.Offset + block.Size) - endOffset)); + } + else if (offset > block.Offset) + { + _blocks[index] = new(block.Offset, offset - block.Offset); + } + else if (endOffset < block.Offset + block.Size) + { + _blocks[index] = new(endOffset, (block.Offset + block.Size) - endOffset); + } + else + { + _blocks.RemoveAt(index); + } + } + public void Free(int offset, int size) { Insert(new MemoryBlock(offset, size)); @@ -92,5 +128,10 @@ namespace Ryujinx.Cpu.LightningJit.Cache _blocks.Insert(index, block); } + + public void Clear() + { + _blocks.Clear(); + } } } diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/JitSupportDarwin.cs b/src/Ryujinx.Cpu/LightningJit/Cache/JitSupportDarwin.cs index 52297b435..06c81045d 100644 --- a/src/Ryujinx.Cpu/LightningJit/Cache/JitSupportDarwin.cs +++ b/src/Ryujinx.Cpu/LightningJit/Cache/JitSupportDarwin.cs @@ -9,5 +9,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache { [LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")] public static partial void Copy(IntPtr dst, IntPtr src, ulong n); + + [LibraryImport("libc", EntryPoint = "sys_icache_invalidate", SetLastError = true)] + public static partial void SysIcacheInvalidate(IntPtr start, IntPtr len); } } diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs b/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs new file mode 100644 index 000000000..0a78d7a5f --- /dev/null +++ b/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs @@ -0,0 +1,340 @@ +using ARMeilleure.Memory; +using Ryujinx.Common; +using Ryujinx.Memory; +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace Ryujinx.Cpu.LightningJit.Cache +{ + class NoWxCache : IDisposable + { + private const int CodeAlignment = 4; // Bytes. + private const int SharedCacheSize = 2047 * 1024 * 1024; + private const int LocalCacheSize = 256 * 1024 * 1024; + + // How many calls to the same function we allow until we pad the shared cache to force the function to become available there + // and allow the guest to take the fast path. + private const int MinCallsForPad = 8; + + private class MemoryCache : IDisposable + { + private readonly ReservedRegion _region; + private readonly CacheMemoryAllocator _cacheAllocator; + + public CacheMemoryAllocator Allocator => _cacheAllocator; + public IntPtr Pointer => _region.Block.Pointer; + + public MemoryCache(IJitMemoryAllocator allocator, ulong size) + { + _region = new(allocator, size); + _cacheAllocator = new((int)size); + } + + public int Allocate(int codeSize) + { + codeSize = AlignCodeSize(codeSize); + + int allocOffset = _cacheAllocator.Allocate(codeSize); + + if (allocOffset < 0) + { + throw new OutOfMemoryException("JIT Cache exhausted."); + } + + _region.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + + return allocOffset; + } + + public void Free(int offset, int size) + { + _cacheAllocator.Free(offset, size); + } + + public void ReprotectAsRw(int offset, int size) + { + Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0); + Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0); + + _region.Block.MapAsRw((ulong)offset, (ulong)size); + } + + public void ReprotectAsRx(int offset, int size) + { + Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0); + Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0); + + _region.Block.MapAsRx((ulong)offset, (ulong)size); + + if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS()) + { + JitSupportDarwin.SysIcacheInvalidate(_region.Block.Pointer + offset, size); + } + else + { + throw new PlatformNotSupportedException(); + } + } + + private static int AlignCodeSize(int codeSize) + { + return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _region.Dispose(); + _cacheAllocator.Clear(); + } + } + + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } + + private readonly IStackWalker _stackWalker; + private readonly Translator _translator; + private readonly MemoryCache _sharedCache; + private readonly MemoryCache _localCache; + private readonly PageAlignedRangeList _pendingMap; + private readonly object _lock; + + class ThreadLocalCacheEntry + { + public readonly int Offset; + public readonly int Size; + public readonly IntPtr FuncPtr; + private int _useCount; + + public ThreadLocalCacheEntry(int offset, int size, IntPtr funcPtr) + { + Offset = offset; + Size = size; + FuncPtr = funcPtr; + _useCount = 0; + } + + public int IncrementUseCount() + { + return ++_useCount; + } + } + + [ThreadStatic] + private static Dictionary _threadLocalCache; + + public NoWxCache(IJitMemoryAllocator allocator, IStackWalker stackWalker, Translator translator) + { + _stackWalker = stackWalker; + _translator = translator; + _sharedCache = new(allocator, SharedCacheSize); + _localCache = new(allocator, LocalCacheSize); + _pendingMap = new(_sharedCache.ReprotectAsRx, RegisterFunction); + _lock = new(); + } + + public unsafe IntPtr Map(IntPtr framePointer, ReadOnlySpan code, ulong guestAddress, ulong guestSize) + { + if (TryGetThreadLocalFunction(guestAddress, out IntPtr funcPtr)) + { + return funcPtr; + } + + lock (_lock) + { + if (!_pendingMap.Has(guestAddress)) + { + int funcOffset = _sharedCache.Allocate(code.Length); + + funcPtr = _sharedCache.Pointer + funcOffset; + code.CopyTo(new Span((void*)funcPtr, code.Length)); + + TranslatedFunction function = new(funcPtr, guestSize); + + _pendingMap.Add(funcOffset, code.Length, guestAddress, function); + } + + ClearThreadLocalCache(framePointer); + + return AddThreadLocalFunction(code, guestAddress); + } + } + + public unsafe IntPtr MapPageAligned(ReadOnlySpan code) + { + lock (_lock) + { + // Ensure we will get an aligned offset from the allocator. + _pendingMap.Pad(_sharedCache.Allocator); + + int sizeAligned = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize()); + int funcOffset = _sharedCache.Allocate(sizeAligned); + + Debug.Assert((funcOffset & ((int)MemoryBlock.GetPageSize() - 1)) == 0); + + IntPtr funcPtr = _sharedCache.Pointer + funcOffset; + code.CopyTo(new Span((void*)funcPtr, code.Length)); + + _sharedCache.ReprotectAsRx(funcOffset, sizeAligned); + + return funcPtr; + } + } + + private bool TryGetThreadLocalFunction(ulong guestAddress, out IntPtr funcPtr) + { + if ((_threadLocalCache ??= new()).TryGetValue(guestAddress, out var entry)) + { + if (entry.IncrementUseCount() >= MinCallsForPad) + { + // Function is being called often, let's make it available in the shared cache so that the guest code + // can take the fast path and stop calling the emulator to get the function from the thread local cache. + // To do that we pad all "pending" function until they complete a page of memory, allowing us to reprotect them as RX. + + lock (_lock) + { + _pendingMap.Pad(_sharedCache.Allocator); + } + } + + funcPtr = entry.FuncPtr; + + return true; + } + + funcPtr = IntPtr.Zero; + + return false; + } + + private void ClearThreadLocalCache(IntPtr framePointer) + { + // Try to delete functions that are already on the shared cache + // and no longer being executed. + + if (_threadLocalCache == null) + { + return; + } + + IEnumerable callStack = _stackWalker.GetCallStack( + framePointer, + _localCache.Pointer, + LocalCacheSize, + _sharedCache.Pointer, + SharedCacheSize); + + List<(ulong, ThreadLocalCacheEntry)> toDelete = new(); + + foreach ((ulong address, ThreadLocalCacheEntry entry) in _threadLocalCache) + { + // We only want to delete if the function is already on the shared cache, + // otherwise we will keep translating the same function over and over again. + bool canDelete = !_pendingMap.Has(address); + if (!canDelete) + { + continue; + } + + // We can only delete if the function is not part of the current thread call stack, + // otherwise we will crash the program when the thread returns to it. + foreach (ulong funcAddress in callStack) + { + if (funcAddress >= (ulong)entry.FuncPtr && funcAddress < (ulong)entry.FuncPtr + (ulong)entry.Size) + { + canDelete = false; + break; + } + } + + if (canDelete) + { + toDelete.Add((address, entry)); + } + } + + int pageSize = (int)MemoryBlock.GetPageSize(); + + foreach ((ulong address, ThreadLocalCacheEntry entry) in toDelete) + { + _threadLocalCache.Remove(address); + + int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize); + + _localCache.Free(entry.Offset, sizeAligned); + _localCache.ReprotectAsRw(entry.Offset, sizeAligned); + } + } + + public void ClearEntireThreadLocalCache() + { + // Thread is exiting, delete everything. + + if (_threadLocalCache == null) + { + return; + } + + int pageSize = (int)MemoryBlock.GetPageSize(); + + foreach ((_, ThreadLocalCacheEntry entry) in _threadLocalCache) + { + int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize); + + _localCache.Free(entry.Offset, sizeAligned); + _localCache.ReprotectAsRw(entry.Offset, sizeAligned); + } + + _threadLocalCache.Clear(); + _threadLocalCache = null; + } + + private unsafe IntPtr AddThreadLocalFunction(ReadOnlySpan code, ulong guestAddress) + { + int alignedSize = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize()); + int funcOffset = _localCache.Allocate(alignedSize); + + Debug.Assert((funcOffset & (int)(MemoryBlock.GetPageSize() - 1)) == 0); + + IntPtr funcPtr = _localCache.Pointer + funcOffset; + code.CopyTo(new Span((void*)funcPtr, code.Length)); + + (_threadLocalCache ??= new()).Add(guestAddress, new(funcOffset, code.Length, funcPtr)); + + _localCache.ReprotectAsRx(funcOffset, alignedSize); + + return funcPtr; + } + + private void RegisterFunction(ulong address, TranslatedFunction func) + { + TranslatedFunction oldFunc = _translator.Functions.GetOrAdd(address, func.GuestSize, func); + + Debug.Assert(oldFunc == func); + + _translator.RegisterFunction(address, func); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _localCache.Dispose(); + _sharedCache.Dispose(); + } + } + + public void Dispose() + { + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } +} diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/PageAlignedRangeList.cs b/src/Ryujinx.Cpu/LightningJit/Cache/PageAlignedRangeList.cs new file mode 100644 index 000000000..b6b386714 --- /dev/null +++ b/src/Ryujinx.Cpu/LightningJit/Cache/PageAlignedRangeList.cs @@ -0,0 +1,218 @@ +using Ryujinx.Common; +using Ryujinx.Memory; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; + +namespace Ryujinx.Cpu.LightningJit.Cache +{ + class PageAlignedRangeList + { + private readonly struct Range : IComparable + { + public int Offset { get; } + public int Size { get; } + + public Range(int offset, int size) + { + Offset = offset; + Size = size; + } + + public int CompareTo([AllowNull] Range other) + { + return Offset.CompareTo(other.Offset); + } + } + + private readonly Action _alignedRangeAction; + private readonly Action _alignedFunctionAction; + private readonly List<(Range, ulong, TranslatedFunction)> _pendingFunctions; + private readonly List _ranges; + + public PageAlignedRangeList(Action alignedRangeAction, Action alignedFunctionAction) + { + _alignedRangeAction = alignedRangeAction; + _alignedFunctionAction = alignedFunctionAction; + _pendingFunctions = new(); + _ranges = new(); + } + + public bool Has(ulong address) + { + foreach ((_, ulong guestAddress, _) in _pendingFunctions) + { + if (guestAddress == address) + { + return true; + } + } + + return false; + } + + public void Add(int offset, int size, ulong address, TranslatedFunction function) + { + Range range = new(offset, size); + + Insert(range); + _pendingFunctions.Add((range, address, function)); + ProcessAlignedRanges(); + } + + public void Pad(CacheMemoryAllocator allocator) + { + int pageSize = (int)MemoryBlock.GetPageSize(); + + for (int index = 0; index < _ranges.Count; index++) + { + Range range = _ranges[index]; + + int endOffset = range.Offset + range.Size; + + int alignedStart = BitUtils.AlignDown(range.Offset, pageSize); + int alignedEnd = BitUtils.AlignUp(endOffset, pageSize); + int alignedSize = alignedEnd - alignedStart; + + if (alignedStart < range.Offset) + { + allocator.ForceAllocation(alignedStart, range.Offset - alignedStart); + } + + if (alignedEnd > endOffset) + { + allocator.ForceAllocation(endOffset, alignedEnd - endOffset); + } + + _alignedRangeAction(alignedStart, alignedSize); + _ranges.RemoveAt(index--); + ProcessPendingFunctions(index, alignedEnd); + } + } + + private void ProcessAlignedRanges() + { + int pageSize = (int)MemoryBlock.GetPageSize(); + + for (int index = 0; index < _ranges.Count; index++) + { + Range range = _ranges[index]; + + int alignedStart = BitUtils.AlignUp(range.Offset, pageSize); + int alignedEnd = BitUtils.AlignDown(range.Offset + range.Size, pageSize); + int alignedSize = alignedEnd - alignedStart; + + if (alignedSize <= 0) + { + continue; + } + + _alignedRangeAction(alignedStart, alignedSize); + SplitAt(ref index, alignedStart, alignedEnd); + ProcessPendingFunctions(index, alignedEnd); + } + } + + private void ProcessPendingFunctions(int rangeIndex, int alignedEnd) + { + if ((rangeIndex > 0 && rangeIndex == _ranges.Count) || + (rangeIndex >= 0 && rangeIndex < _ranges.Count && _ranges[rangeIndex].Offset >= alignedEnd)) + { + rangeIndex--; + } + + int alignedStart; + + if (rangeIndex >= 0) + { + alignedStart = _ranges[rangeIndex].Offset + _ranges[rangeIndex].Size; + } + else + { + alignedStart = 0; + } + + if (rangeIndex < _ranges.Count - 1) + { + alignedEnd = _ranges[rangeIndex + 1].Offset; + } + else + { + alignedEnd = int.MaxValue; + } + + for (int index = 0; index < _pendingFunctions.Count; index++) + { + (Range range, ulong address, TranslatedFunction function) = _pendingFunctions[index]; + + if (range.Offset >= alignedStart && range.Offset + range.Size <= alignedEnd) + { + _alignedFunctionAction(address, function); + _pendingFunctions.RemoveAt(index--); + } + } + } + + private void Insert(Range range) + { + int index = _ranges.BinarySearch(range); + + if (index < 0) + { + index = ~index; + } + + if (index < _ranges.Count) + { + Range next = _ranges[index]; + + int endOffs = range.Offset + range.Size; + + if (next.Offset == endOffs) + { + range = new Range(range.Offset, range.Size + next.Size); + _ranges.RemoveAt(index); + } + } + + if (index > 0) + { + Range prev = _ranges[index - 1]; + + if (prev.Offset + prev.Size == range.Offset) + { + range = new Range(range.Offset - prev.Size, range.Size + prev.Size); + _ranges.RemoveAt(--index); + } + } + + _ranges.Insert(index, range); + } + + private void SplitAt(ref int index, int alignedStart, int alignedEnd) + { + Range range = _ranges[index]; + + if (range.Offset < alignedStart) + { + _ranges[index++] = new(range.Offset, alignedStart - range.Offset); + + if (range.Offset + range.Size > alignedEnd) + { + _ranges.Insert(index, new(alignedEnd, (range.Offset + range.Size) - alignedEnd)); + } + } + else if (range.Offset + range.Size > alignedEnd) + { + _ranges[index] = new(alignedEnd, (range.Offset + range.Size) - alignedEnd); + } + else if (range.Offset == alignedStart && range.Offset + range.Size == alignedEnd) + { + Debug.Assert(range.Offset == alignedStart && range.Offset + range.Size == alignedEnd); + + _ranges.RemoveAt(index--); + } + } + } +} diff --git a/src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/StackWalker.cs b/src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/StackWalker.cs new file mode 100644 index 000000000..ffed56a65 --- /dev/null +++ b/src/Ryujinx.Cpu/LightningJit/CodeGen/Arm64/StackWalker.cs @@ -0,0 +1,30 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64 +{ + class StackWalker : IStackWalker + { + public IEnumerable GetCallStack(IntPtr framePointer, IntPtr codeRegionStart, int codeRegionSize, IntPtr codeRegion2Start, int codeRegion2Size) + { + List functionPointers = new(); + + while (true) + { + IntPtr functionPointer = Marshal.ReadIntPtr(framePointer, IntPtr.Size); + + if ((functionPointer < codeRegionStart || functionPointer >= codeRegionStart + codeRegionSize) && + (functionPointer < codeRegion2Start || functionPointer >= codeRegion2Start + codeRegion2Size)) + { + break; + } + + functionPointers.Add((ulong)functionPointer - 4); + framePointer = Marshal.ReadIntPtr(framePointer); + } + + return functionPointers; + } + } +} \ No newline at end of file diff --git a/src/Ryujinx.Cpu/LightningJit/IStackWalker.cs b/src/Ryujinx.Cpu/LightningJit/IStackWalker.cs new file mode 100644 index 000000000..d330ef788 --- /dev/null +++ b/src/Ryujinx.Cpu/LightningJit/IStackWalker.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Cpu.LightningJit +{ + interface IStackWalker + { + IEnumerable GetCallStack(IntPtr framePointer, IntPtr codeRegionStart, int codeRegionSize, IntPtr codeRegion2Start, int codeRegion2Size); + } +} \ No newline at end of file diff --git a/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs b/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs index efe60c018..0f07abc25 100644 --- a/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs +++ b/src/Ryujinx.Cpu/LightningJit/LightningJitCpuContext.cs @@ -1,4 +1,4 @@ -using ARMeilleure.Memory; +using ARMeilleure.Memory; using Ryujinx.Cpu.Jit; using Ryujinx.Cpu.LightningJit.State; @@ -12,7 +12,7 @@ namespace Ryujinx.Cpu.LightningJit public LightningJitCpuContext(ITickSource tickSource, IMemoryManager memory, bool for64Bit) { _tickSource = tickSource; - _translator = new Translator(new JitMemoryAllocator(), memory, for64Bit); + _translator = new Translator(new JitMemoryAllocator(forJit: true), memory, for64Bit); memory.UnmapEvent += UnmapHandler; } diff --git a/src/Ryujinx.Cpu/LightningJit/NativeInterface.cs b/src/Ryujinx.Cpu/LightningJit/NativeInterface.cs index d67a45d4c..da3ad9832 100644 --- a/src/Ryujinx.Cpu/LightningJit/NativeInterface.cs +++ b/src/Ryujinx.Cpu/LightningJit/NativeInterface.cs @@ -61,11 +61,9 @@ namespace Ryujinx.Cpu.LightningJit return GetContext().CntpctEl0; } - public static ulong GetFunctionAddress(ulong address) + public static ulong GetFunctionAddress(IntPtr framePointer, ulong address) { - TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode); - - return (ulong)function.FuncPointer.ToInt64(); + return (ulong)Context.Translator.GetOrTranslatePointer(framePointer, address, GetContext().ExecutionMode); } public static void InvalidateCacheLine(ulong address) diff --git a/src/Ryujinx.Cpu/LightningJit/Translator.cs b/src/Ryujinx.Cpu/LightningJit/Translator.cs index fc5df0a8e..a1ff448de 100644 --- a/src/Ryujinx.Cpu/LightningJit/Translator.cs +++ b/src/Ryujinx.Cpu/LightningJit/Translator.cs @@ -1,7 +1,9 @@ using ARMeilleure.Common; using ARMeilleure.Memory; using ARMeilleure.Signal; +using Ryujinx.Cpu.Jit; using Ryujinx.Cpu.LightningJit.Cache; +using Ryujinx.Cpu.LightningJit.CodeGen.Arm64; using Ryujinx.Cpu.LightningJit.State; using System; using System.Collections.Concurrent; @@ -13,6 +15,9 @@ namespace Ryujinx.Cpu.LightningJit { class Translator : IDisposable { + // Should be enabled on platforms that enforce W^X. + private static bool IsNoWxPlatform => false; + private static readonly AddressTable.Level[] _levels64Bit = new AddressTable.Level[] { @@ -33,6 +38,7 @@ namespace Ryujinx.Cpu.LightningJit }; private readonly ConcurrentQueue> _oldFuncs; + private readonly NoWxCache _noWxCache; private bool _disposed; internal TranslatorCache Functions { get; } @@ -46,12 +52,20 @@ namespace Ryujinx.Cpu.LightningJit _oldFuncs = new ConcurrentQueue>(); - JitCache.Initialize(allocator); + if (IsNoWxPlatform) + { + _noWxCache = new(new JitMemoryAllocator(), CreateStackWalker(), this); + } + else + { + JitCache.Initialize(allocator); + } + NativeSignalHandler.Initialize(allocator); Functions = new TranslatorCache(); FunctionTable = new AddressTable(for64Bits ? _levels64Bit : _levels32Bit); - Stubs = new TranslatorStubs(FunctionTable); + Stubs = new TranslatorStubs(FunctionTable, _noWxCache); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; @@ -61,6 +75,18 @@ namespace Ryujinx.Cpu.LightningJit } } + private static IStackWalker CreateStackWalker() + { + if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + return new StackWalker(); + } + else + { + throw new PlatformNotSupportedException(); + } + } + public void Execute(State.ExecutionContext context, ulong address) { ObjectDisposedException.ThrowIf(_disposed, this); @@ -70,9 +96,22 @@ namespace Ryujinx.Cpu.LightningJit Stubs.DispatchLoop(context.NativeContextPtr, address); NativeInterface.UnregisterThread(); + _noWxCache?.ClearEntireThreadLocalCache(); } - internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) + internal IntPtr GetOrTranslatePointer(IntPtr framePointer, ulong address, ExecutionMode mode) + { + if (_noWxCache != null) + { + CompiledFunction func = Compile(address, mode); + + return _noWxCache.Map(framePointer, func.Code, address, (ulong)func.GuestCodeLength); + } + + return GetOrTranslate(address, mode).FuncPointer; + } + + private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) { if (!Functions.TryGetValue(address, out TranslatedFunction func)) { @@ -86,7 +125,6 @@ namespace Ryujinx.Cpu.LightningJit func = oldFunc; } - RegisterFunction(address, func); } @@ -103,13 +141,17 @@ namespace Ryujinx.Cpu.LightningJit internal TranslatedFunction Translate(ulong address, ExecutionMode mode) { - CompiledFunction func = AarchCompiler.Compile(CpuPresets.CortexA57, Memory, address, FunctionTable, Stubs.DispatchStub, mode, RuntimeInformation.ProcessArchitecture); - + CompiledFunction func = Compile(address, mode); IntPtr funcPointer = JitCache.Map(func.Code); return new TranslatedFunction(funcPointer, (ulong)func.GuestCodeLength); } + internal CompiledFunction Compile(ulong address, ExecutionMode mode) + { + return AarchCompiler.Compile(CpuPresets.CortexA57, Memory, address, FunctionTable, Stubs.DispatchStub, mode, RuntimeInformation.ProcessArchitecture); + } + public void InvalidateJitCacheRegion(ulong address, ulong size) { ulong[] overlapAddresses = Array.Empty(); @@ -160,7 +202,14 @@ namespace Ryujinx.Cpu.LightningJit { if (disposing) { - ClearJitCache(); + if (_noWxCache != null) + { + _noWxCache.Dispose(); + } + else + { + ClearJitCache(); + } Stubs.Dispose(); FunctionTable.Dispose(); diff --git a/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs b/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs index 3a0b78982..914712bb1 100644 --- a/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs +++ b/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs @@ -17,13 +17,14 @@ namespace Ryujinx.Cpu.LightningJit /// class TranslatorStubs : IDisposable { - private delegate ulong GetFunctionAddressDelegate(ulong address); + private delegate ulong GetFunctionAddressDelegate(IntPtr framePointer, ulong address); private readonly Lazy _slowDispatchStub; private bool _disposed; private readonly AddressTable _functionTable; + private readonly NoWxCache _noWxCache; private readonly GetFunctionAddressDelegate _getFunctionAddressRef; private readonly IntPtr _getFunctionAddress; private readonly Lazy _dispatchStub; @@ -76,12 +77,14 @@ namespace Ryujinx.Cpu.LightningJit /// instance. /// /// Function table used to store pointers to the functions that the guest code will call + /// Cache used on platforms that enforce W^X, otherwise should be null /// is null - public TranslatorStubs(AddressTable functionTable) + public TranslatorStubs(AddressTable functionTable, NoWxCache noWxCache) { ArgumentNullException.ThrowIfNull(functionTable); _functionTable = functionTable; + _noWxCache = noWxCache; _getFunctionAddressRef = NativeInterface.GetFunctionAddress; _getFunctionAddress = Marshal.GetFunctionPointerForDelegate(_getFunctionAddressRef); _slowDispatchStub = new(GenerateSlowDispatchStub, isThreadSafe: true); @@ -106,14 +109,17 @@ namespace Ryujinx.Cpu.LightningJit { if (!_disposed) { - if (_dispatchStub.IsValueCreated) + if (_noWxCache == null) { - JitCache.Unmap(_dispatchStub.Value); - } + if (_dispatchStub.IsValueCreated) + { + JitCache.Unmap(_dispatchStub.Value); + } - if (_dispatchLoop.IsValueCreated) - { - JitCache.Unmap(Marshal.GetFunctionPointerForDelegate(_dispatchLoop.Value)); + if (_dispatchLoop.IsValueCreated) + { + JitCache.Unmap(Marshal.GetFunctionPointerForDelegate(_dispatchLoop.Value)); + } } _disposed = true; @@ -197,7 +203,8 @@ namespace Ryujinx.Cpu.LightningJit } // Fallback. - asm.Mov(Register(0), guestAddress); + asm.Mov(Register(0), Register(29)); + asm.Mov(Register(1), guestAddress); asm.Mov(Register(16), (ulong)_getFunctionAddress); asm.Blr(Register(16)); asm.Mov(Register(16), Register(0)); @@ -212,7 +219,7 @@ namespace Ryujinx.Cpu.LightningJit throw new PlatformNotSupportedException(); } - return JitCache.Map(writer.AsByteSpan()); + return Map(writer.AsByteSpan()); } /// @@ -234,7 +241,8 @@ namespace Ryujinx.Cpu.LightningJit asm.Mov(context, Register(0)); // Load the target guest address from the native context. - asm.LdrRiUn(Register(0), context, NativeContext.GetDispatchAddressOffset()); + asm.Mov(Register(0), Register(29)); + asm.LdrRiUn(Register(1), context, NativeContext.GetDispatchAddressOffset()); asm.Mov(Register(16), (ulong)_getFunctionAddress); asm.Blr(Register(16)); asm.Mov(Register(16), Register(0)); @@ -249,7 +257,7 @@ namespace Ryujinx.Cpu.LightningJit throw new PlatformNotSupportedException(); } - return JitCache.Map(writer.AsByteSpan()); + return Map(writer.AsByteSpan()); } /// @@ -312,7 +320,7 @@ namespace Ryujinx.Cpu.LightningJit Operand context = Register(19); asm.Mov(context, Register(0)); - EmitSyncFpContext(ref asm, context, Register(16), Register(17), true); + EmitSyncFpContext(ref asm, context, Register(16, OperandType.I32), Register(17, OperandType.I32), true); // Load the target guest address from the native context. Operand guestAddress = Register(16); @@ -331,7 +339,7 @@ namespace Ryujinx.Cpu.LightningJit asm.Cbz(Register(17), 8); asm.B((loopStartIndex - writer.InstructionPointer) * 4); - EmitSyncFpContext(ref asm, context, Register(16), Register(17), false); + EmitSyncFpContext(ref asm, context, Register(16, OperandType.I32), Register(17, OperandType.I32), false); rsr.WriteEpilogue(ref asm); @@ -342,11 +350,23 @@ namespace Ryujinx.Cpu.LightningJit throw new PlatformNotSupportedException(); } - IntPtr pointer = JitCache.Map(writer.AsByteSpan()); + IntPtr pointer = Map(writer.AsByteSpan()); return Marshal.GetDelegateForFunctionPointer(pointer); } + private IntPtr Map(ReadOnlySpan code) + { + if (_noWxCache != null) + { + return _noWxCache.MapPageAligned(code); + } + else + { + return JitCache.Map(code); + } + } + private static Operand Register(int register, OperandType type = OperandType.I64) { return new Operand(register, RegisterType.Integer, type);