From 2e2d26d49dc7978355f22c82eedface1acf895b3 Mon Sep 17 00:00:00 2001 From: VocalFan <45863583+FluffyOMC@users.noreply.github.com> Date: Mon, 3 Feb 2025 02:05:40 -0500 Subject: [PATCH 1/5] Cursed thing that fixes JIT Cache exhaustion --- src/ARMeilleure/Memory/ReservedRegion.cs | 2 + src/ARMeilleure/Translation/Cache/JitCache.cs | 110 ++++++++++++------ .../Sockets/Bsd/Impl/WinSockHelper.cs | 1 + 3 files changed, 79 insertions(+), 34 deletions(-) diff --git a/src/ARMeilleure/Memory/ReservedRegion.cs b/src/ARMeilleure/Memory/ReservedRegion.cs index a3ebd610d..dfe17c933 100644 --- a/src/ARMeilleure/Memory/ReservedRegion.cs +++ b/src/ARMeilleure/Memory/ReservedRegion.cs @@ -7,6 +7,7 @@ namespace ARMeilleure.Memory public const int DefaultGranularity = 65536; // Mapping granularity in Windows. public IJitMemoryBlock Block { get; } + public IJitMemoryAllocator Allocator { get; } public nint Pointer => Block.Pointer; @@ -21,6 +22,7 @@ namespace ARMeilleure.Memory granularity = DefaultGranularity; } + Allocator = allocator; Block = allocator.Reserve(maxSize); _maxSize = maxSize; _sizeGranularity = granularity; diff --git a/src/ARMeilleure/Translation/Cache/JitCache.cs b/src/ARMeilleure/Translation/Cache/JitCache.cs index d7e8201d8..8a40b6b63 100644 --- a/src/ARMeilleure/Translation/Cache/JitCache.cs +++ b/src/ARMeilleure/Translation/Cache/JitCache.cs @@ -2,6 +2,7 @@ using ARMeilleure.CodeGen; using ARMeilleure.CodeGen.Unwinding; using ARMeilleure.Memory; using ARMeilleure.Native; +using Ryujinx.Common.Logging; using Ryujinx.Memory; using System; using System.Collections.Generic; @@ -18,7 +19,7 @@ namespace ARMeilleure.Translation.Cache private static readonly int _pageMask = _pageSize - 1; private const int CodeAlignment = 4; // Bytes. - private const int CacheSize = 2047 * 1024 * 1024; + private const int CacheSize = 256 * 1024 * 1024; private static ReservedRegion _jitRegion; private static JitCacheInvalidation _jitCacheInvalidator; @@ -30,6 +31,9 @@ namespace ARMeilleure.Translation.Cache private static readonly Lock _lock = new(); private static bool _initialized; + private static readonly List _jitRegions = new(); + private static int _activeRegionIndex = 0; + [SupportedOSPlatform("windows")] [LibraryImport("kernel32.dll", SetLastError = true)] public static partial nint FlushInstructionCache(nint hProcess, nint lpAddress, nuint dwSize); @@ -48,7 +52,9 @@ namespace ARMeilleure.Translation.Cache return; } - _jitRegion = new ReservedRegion(allocator, CacheSize); + var firstRegion = new ReservedRegion(allocator, CacheSize); + _jitRegions.Add(firstRegion); + _activeRegionIndex = 0; if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS()) { @@ -59,7 +65,9 @@ namespace ARMeilleure.Translation.Cache if (OperatingSystem.IsWindows()) { - JitUnwindWindows.InstallFunctionTableHandler(_jitRegion.Pointer, CacheSize, _jitRegion.Pointer + Allocate(_pageSize)); + JitUnwindWindows.InstallFunctionTableHandler( + firstRegion.Pointer, CacheSize, firstRegion.Pointer + Allocate(_pageSize) + ); } _initialized = true; @@ -75,8 +83,8 @@ namespace ARMeilleure.Translation.Cache Debug.Assert(_initialized); int funcOffset = Allocate(code.Length); - - nint funcPtr = _jitRegion.Pointer + funcOffset; + ReservedRegion targetRegion = _jitRegions[_activeRegionIndex]; + nint funcPtr = targetRegion.Pointer + funcOffset; if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) { @@ -90,9 +98,9 @@ namespace ARMeilleure.Translation.Cache } else { - ReprotectAsWritable(funcOffset, code.Length); + ReprotectAsWritable(targetRegion, funcOffset, code.Length); Marshal.Copy(code, 0, funcPtr, code.Length); - ReprotectAsExecutable(funcOffset, code.Length); + ReprotectAsExecutable(targetRegion, funcOffset, code.Length); if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) { @@ -116,52 +124,83 @@ namespace ARMeilleure.Translation.Cache { Debug.Assert(_initialized); - int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64()); - - if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) + foreach (var region in _jitRegions) { - _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); - _cacheEntries.RemoveAt(entryIndex); + if (pointer.ToInt64() < region.Pointer.ToInt64() || + pointer.ToInt64() >= (region.Pointer + CacheSize).ToInt64()) + { + continue; + } + + int funcOffset = (int)(pointer.ToInt64() - region.Pointer.ToInt64()); + + if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) + { + _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); + _cacheEntries.RemoveAt(entryIndex); + } + + return; } } } - private static void ReprotectAsWritable(int offset, int size) + private static void ReprotectAsWritable(ReservedRegion region, int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; int regionEnd = (endOffs + _pageMask) & ~_pageMask; - _jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + region.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } - private static void ReprotectAsExecutable(int offset, int size) + private static void ReprotectAsExecutable(ReservedRegion region, int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; int regionEnd = (endOffs + _pageMask) & ~_pageMask; - _jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + region.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } private static int Allocate(int codeSize) { codeSize = AlignCodeSize(codeSize); - int allocOffset = _cacheAllocator.Allocate(codeSize); - - if (allocOffset < 0) + for (int i = _activeRegionIndex; i < _jitRegions.Count; i++) { - throw new OutOfMemoryException("JIT Cache exhausted."); + int allocOffset = _cacheAllocator.Allocate(codeSize); + + if (allocOffset >= 0) + { + _jitRegions[i].ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + _activeRegionIndex = i; + return allocOffset; + } } - _jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + int exhaustedRegion = _activeRegionIndex; + var newRegion = new ReservedRegion(_jitRegions[0].Allocator, CacheSize); + _jitRegions.Add(newRegion); + _activeRegionIndex = _jitRegions.Count - 1; + + int newRegionNumber = _activeRegionIndex; - return allocOffset; + Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {newRegionNumber} ({(newRegionNumber + 1) * (CacheSize / 1024 / 1024)}MB Total Allocation)."); + + _cacheAllocator = new CacheMemoryAllocator(CacheSize); + + int allocOffsetNew = _cacheAllocator.Allocate(codeSize); + if (allocOffsetNew < 0) + { + throw new OutOfMemoryException("Failed to allocate in new Cache Region!"); + } + + newRegion.ExpandIfNeeded((ulong)allocOffsetNew + (ulong)codeSize); + return allocOffsetNew; } + private static int AlignCodeSize(int codeSize) { return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1); @@ -185,18 +224,21 @@ namespace ARMeilleure.Translation.Cache { lock (_lock) { - int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); - - if (index < 0) + foreach (var region in _jitRegions) { - index = ~index - 1; - } + int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); - if (index >= 0) - { - entry = _cacheEntries[index]; - entryIndex = index; - return true; + if (index < 0) + { + index = ~index - 1; + } + + if (index >= 0) + { + entry = _cacheEntries[index]; + entryIndex = index; + return true; + } } } diff --git a/src/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/WinSockHelper.cs b/src/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/WinSockHelper.cs index 3db2712f3..018bb8f14 100644 --- a/src/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/WinSockHelper.cs +++ b/src/Ryujinx.HLE/HOS/Services/Sockets/Bsd/Impl/WinSockHelper.cs @@ -150,6 +150,7 @@ namespace Ryujinx.HLE.HOS.Services.Sockets.Bsd.Impl { BsdSocketOption.SoLinger, SocketOptionName.Linger }, { BsdSocketOption.SoOobInline, SocketOptionName.OutOfBandInline }, { BsdSocketOption.SoReusePort, SocketOptionName.ReuseAddress }, + { BsdSocketOption.SoNoSigpipe, SocketOptionName.DontLinger }, { BsdSocketOption.SoSndBuf, SocketOptionName.SendBuffer }, { BsdSocketOption.SoRcvBuf, SocketOptionName.ReceiveBuffer }, { BsdSocketOption.SoSndLoWat, SocketOptionName.SendLowWater }, -- 2.47.1 From 0e8a41b19836595cce7db788145d6d13478322f4 Mon Sep 17 00:00:00 2001 From: VocalFan <45863583+FluffyOMC@users.noreply.github.com> Date: Mon, 3 Feb 2025 02:29:44 -0500 Subject: [PATCH 2/5] Humanizer boi --- src/ARMeilleure/Translation/Cache/JitCache.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ARMeilleure/Translation/Cache/JitCache.cs b/src/ARMeilleure/Translation/Cache/JitCache.cs index 8a40b6b63..0ede558b6 100644 --- a/src/ARMeilleure/Translation/Cache/JitCache.cs +++ b/src/ARMeilleure/Translation/Cache/JitCache.cs @@ -2,6 +2,7 @@ using ARMeilleure.CodeGen; using ARMeilleure.CodeGen.Unwinding; using ARMeilleure.Memory; using ARMeilleure.Native; +using Humanizer; using Ryujinx.Common.Logging; using Ryujinx.Memory; using System; @@ -21,7 +22,6 @@ namespace ARMeilleure.Translation.Cache private const int CodeAlignment = 4; // Bytes. private const int CacheSize = 256 * 1024 * 1024; - private static ReservedRegion _jitRegion; private static JitCacheInvalidation _jitCacheInvalidator; private static CacheMemoryAllocator _cacheAllocator; @@ -186,7 +186,7 @@ namespace ARMeilleure.Translation.Cache int newRegionNumber = _activeRegionIndex; - Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {newRegionNumber} ({(newRegionNumber + 1) * (CacheSize / 1024 / 1024)}MB Total Allocation)."); + Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {newRegionNumber} ({((newRegionNumber + 1) * CacheSize).Bytes()} Total Allocation)."); _cacheAllocator = new CacheMemoryAllocator(CacheSize); -- 2.47.1 From b2354768c48fd57ada06f4913f309339e5d3f2ac Mon Sep 17 00:00:00 2001 From: VocalFan <45863583+FluffyOMC@users.noreply.github.com> Date: Mon, 3 Feb 2025 03:17:22 -0500 Subject: [PATCH 3/5] Change Region size from 256MB to 128MB --- src/ARMeilleure/Translation/Cache/JitCache.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ARMeilleure/Translation/Cache/JitCache.cs b/src/ARMeilleure/Translation/Cache/JitCache.cs index 0ede558b6..dee325ef6 100644 --- a/src/ARMeilleure/Translation/Cache/JitCache.cs +++ b/src/ARMeilleure/Translation/Cache/JitCache.cs @@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.Cache private static readonly int _pageMask = _pageSize - 1; private const int CodeAlignment = 4; // Bytes. - private const int CacheSize = 256 * 1024 * 1024; + private const int CacheSize = 128 * 1024 * 1024; private static JitCacheInvalidation _jitCacheInvalidator; -- 2.47.1 From 5c67efd291ef474bc62bbd35d168f81b4d923a44 Mon Sep 17 00:00:00 2001 From: VocalFan <45863583+FluffyOMC@users.noreply.github.com> Date: Mon, 3 Feb 2025 03:55:38 -0500 Subject: [PATCH 4/5] MacOS gets it too :3 --- .../LightningJit/Cache/JitCache.cs | 94 ++++++++++++------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs b/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs index 5849401ab..f64de9ba1 100644 --- a/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs +++ b/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs @@ -1,4 +1,6 @@ using ARMeilleure.Memory; +using Humanizer; +using Ryujinx.Common.Logging; using Ryujinx.Memory; using System; using System.Collections.Generic; @@ -15,9 +17,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache private static readonly int _pageMask = _pageSize - 1; private const int CodeAlignment = 4; // Bytes. - private const int CacheSize = 2047 * 1024 * 1024; + private const int CacheSize = 128 * 1024 * 1024; - private static ReservedRegion _jitRegion; private static JitCacheInvalidation _jitCacheInvalidator; private static CacheMemoryAllocator _cacheAllocator; @@ -26,6 +27,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache private static readonly Lock _lock = new(); private static bool _initialized; + private static readonly List _jitRegions = new(); + private static int _activeRegionIndex = 0; [SupportedOSPlatform("windows")] [LibraryImport("kernel32.dll", SetLastError = true)] @@ -45,7 +48,9 @@ namespace Ryujinx.Cpu.LightningJit.Cache return; } - _jitRegion = new ReservedRegion(allocator, CacheSize); + var firstRegion = new ReservedRegion(allocator, CacheSize); + _jitRegions.Add(firstRegion); + _activeRegionIndex = 0; if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS()) { @@ -65,8 +70,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache Debug.Assert(_initialized); int funcOffset = Allocate(code.Length); - - nint funcPtr = _jitRegion.Pointer + funcOffset; + ReservedRegion targetRegion = _jitRegions[_activeRegionIndex]; + nint funcPtr = targetRegion.Pointer + funcOffset; if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) { @@ -80,18 +85,11 @@ namespace Ryujinx.Cpu.LightningJit.Cache } else { - ReprotectAsWritable(funcOffset, code.Length); - code.CopyTo(new Span((void*)funcPtr, code.Length)); - ReprotectAsExecutable(funcOffset, code.Length); + ReprotectAsWritable(targetRegion, funcOffset, code.Length); + Marshal.Copy(code.ToArray(), 0, funcPtr, code.Length); + ReprotectAsExecutable(targetRegion, funcOffset, code.Length); - if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) - { - FlushInstructionCache(Process.GetCurrentProcess().Handle, funcPtr, (nuint)code.Length); - } - else - { - _jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length); - } + _jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length); } Add(funcOffset, code.Length); @@ -106,50 +104,80 @@ namespace Ryujinx.Cpu.LightningJit.Cache { Debug.Assert(_initialized); - int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64()); - - if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) + foreach (var region in _jitRegions) { - _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); - _cacheEntries.RemoveAt(entryIndex); + if (pointer.ToInt64() < region.Pointer.ToInt64() || + pointer.ToInt64() >= (region.Pointer + CacheSize).ToInt64()) + { + continue; + } + + int funcOffset = (int)(pointer.ToInt64() - region.Pointer.ToInt64()); + + if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) + { + _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); + _cacheEntries.RemoveAt(entryIndex); + } + + return; } } } - private static void ReprotectAsWritable(int offset, int size) + private static void ReprotectAsWritable(ReservedRegion region, int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; int regionEnd = (endOffs + _pageMask) & ~_pageMask; - _jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + region.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } - private static void ReprotectAsExecutable(int offset, int size) + private static void ReprotectAsExecutable(ReservedRegion region, int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; int regionEnd = (endOffs + _pageMask) & ~_pageMask; - _jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + region.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } private static int Allocate(int codeSize) { codeSize = AlignCodeSize(codeSize); - int allocOffset = _cacheAllocator.Allocate(codeSize); - - if (allocOffset < 0) + for (int i = _activeRegionIndex; i < _jitRegions.Count; i++) { - throw new OutOfMemoryException("JIT Cache exhausted."); + int allocOffset = _cacheAllocator.Allocate(codeSize); + + if (allocOffset >= 0) + { + _jitRegions[i].ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + _activeRegionIndex = i; + return allocOffset; + } } - _jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + int exhaustedRegion = _activeRegionIndex; + var newRegion = new ReservedRegion(_jitRegions[0].Allocator, CacheSize); + _jitRegions.Add(newRegion); + _activeRegionIndex = _jitRegions.Count - 1; + + int newRegionNumber = _activeRegionIndex; - return allocOffset; + Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {newRegionNumber} ({((newRegionNumber + 1) * CacheSize).Bytes()} Total Allocation)."); + + _cacheAllocator = new CacheMemoryAllocator(CacheSize); + + int allocOffsetNew = _cacheAllocator.Allocate(codeSize); + if (allocOffsetNew < 0) + { + throw new OutOfMemoryException("Failed to allocate in new Cache Region!"); + } + + newRegion.ExpandIfNeeded((ulong)allocOffsetNew + (ulong)codeSize); + return allocOffsetNew; } private static int AlignCodeSize(int codeSize) -- 2.47.1 From 9886d49204a6782572d6e11140d80978206760be Mon Sep 17 00:00:00 2001 From: VocalFan <45863583+FluffyOMC@users.noreply.github.com> Date: Fri, 7 Feb 2025 19:46:49 -0500 Subject: [PATCH 5/5] Change from 128mb to 256mb region size --- src/ARMeilleure/Translation/Cache/JitCache.cs | 2 +- src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs | 2 +- src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ARMeilleure/Translation/Cache/JitCache.cs b/src/ARMeilleure/Translation/Cache/JitCache.cs index dee325ef6..0ede558b6 100644 --- a/src/ARMeilleure/Translation/Cache/JitCache.cs +++ b/src/ARMeilleure/Translation/Cache/JitCache.cs @@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.Cache private static readonly int _pageMask = _pageSize - 1; private const int CodeAlignment = 4; // Bytes. - private const int CacheSize = 128 * 1024 * 1024; + private const int CacheSize = 256 * 1024 * 1024; private static JitCacheInvalidation _jitCacheInvalidator; diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs b/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs index f64de9ba1..c994d424e 100644 --- a/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs +++ b/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs @@ -17,7 +17,7 @@ namespace Ryujinx.Cpu.LightningJit.Cache private static readonly int _pageMask = _pageSize - 1; private const int CodeAlignment = 4; // Bytes. - private const int CacheSize = 128 * 1024 * 1024; + private const int CacheSize = 256 * 1024 * 1024; private static JitCacheInvalidation _jitCacheInvalidator; diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs b/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs index 1bbf70182..65d297c28 100644 --- a/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs +++ b/src/Ryujinx.Cpu/LightningJit/Cache/NoWxCache.cs @@ -12,7 +12,7 @@ namespace Ryujinx.Cpu.LightningJit.Cache { private const int CodeAlignment = 4; // Bytes. private const int SharedCacheSize = 2047 * 1024 * 1024; - private const int LocalCacheSize = 128 * 1024 * 1024; + private const int LocalCacheSize = 256 * 1024 * 1024; // How many calls to the same function we allow until we pad the shared cache to force the function to become available there // and allow the guest to take the fast path. -- 2.47.1