Add Jit Cache Regions! #615

Merged
FluffyOMC merged 21 commits from master into master 2025-02-10 21:53:44 +00:00
5 changed files with 142 additions and 69 deletions

View File

@ -7,6 +7,7 @@ namespace ARMeilleure.Memory
public const int DefaultGranularity = 65536; // Mapping granularity in Windows.
public IJitMemoryBlock Block { get; }
public IJitMemoryAllocator Allocator { get; }
public nint Pointer => Block.Pointer;
@ -21,6 +22,7 @@ namespace ARMeilleure.Memory
granularity = DefaultGranularity;
}
Allocator = allocator;
Block = allocator.Reserve(maxSize);
_maxSize = maxSize;
_sizeGranularity = granularity;

View File

@ -2,6 +2,8 @@ using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.Unwinding;
using ARMeilleure.Memory;
using ARMeilleure.Native;
using Humanizer;
using Ryujinx.Common.Logging;
using Ryujinx.Memory;
using System;
using System.Collections.Generic;
@ -18,9 +20,8 @@ namespace ARMeilleure.Translation.Cache
private static readonly int _pageMask = _pageSize - 1;
private const int CodeAlignment = 4; // Bytes.
private const int CacheSize = 2047 * 1024 * 1024;
private const int CacheSize = 256 * 1024 * 1024;
private static ReservedRegion _jitRegion;
private static JitCacheInvalidation _jitCacheInvalidator;
private static CacheMemoryAllocator _cacheAllocator;
@ -30,6 +31,9 @@ namespace ARMeilleure.Translation.Cache
private static readonly Lock _lock = new();
private static bool _initialized;
private static readonly List<ReservedRegion> _jitRegions = new();
private static int _activeRegionIndex = 0;
[SupportedOSPlatform("windows")]
[LibraryImport("kernel32.dll", SetLastError = true)]
public static partial nint FlushInstructionCache(nint hProcess, nint lpAddress, nuint dwSize);
@ -48,7 +52,9 @@ namespace ARMeilleure.Translation.Cache
return;
}
_jitRegion = new ReservedRegion(allocator, CacheSize);
var firstRegion = new ReservedRegion(allocator, CacheSize);
_jitRegions.Add(firstRegion);
_activeRegionIndex = 0;
if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS())
{
@ -59,7 +65,9 @@ namespace ARMeilleure.Translation.Cache
if (OperatingSystem.IsWindows())
{
JitUnwindWindows.InstallFunctionTableHandler(_jitRegion.Pointer, CacheSize, _jitRegion.Pointer + Allocate(_pageSize));
JitUnwindWindows.InstallFunctionTableHandler(
firstRegion.Pointer, CacheSize, firstRegion.Pointer + Allocate(_pageSize)
);
}
_initialized = true;
@ -75,8 +83,8 @@ namespace ARMeilleure.Translation.Cache
Debug.Assert(_initialized);
int funcOffset = Allocate(code.Length);
nint funcPtr = _jitRegion.Pointer + funcOffset;
ReservedRegion targetRegion = _jitRegions[_activeRegionIndex];
nint funcPtr = targetRegion.Pointer + funcOffset;
if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
@ -90,9 +98,9 @@ namespace ARMeilleure.Translation.Cache
}
else
{
ReprotectAsWritable(funcOffset, code.Length);
ReprotectAsWritable(targetRegion, funcOffset, code.Length);
Marshal.Copy(code, 0, funcPtr, code.Length);
ReprotectAsExecutable(funcOffset, code.Length);
ReprotectAsExecutable(targetRegion, funcOffset, code.Length);
if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
@ -116,52 +124,83 @@ namespace ARMeilleure.Translation.Cache
{
Debug.Assert(_initialized);
int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64());
if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset)
foreach (var region in _jitRegions)
{
_cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size));
_cacheEntries.RemoveAt(entryIndex);
if (pointer.ToInt64() < region.Pointer.ToInt64() ||
pointer.ToInt64() >= (region.Pointer + CacheSize).ToInt64())
{
continue;
}
int funcOffset = (int)(pointer.ToInt64() - region.Pointer.ToInt64());
if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset)
{
_cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size));
_cacheEntries.RemoveAt(entryIndex);
}
return;
}
}
}
private static void ReprotectAsWritable(int offset, int size)
private static void ReprotectAsWritable(ReservedRegion region, int offset, int size)
{
int endOffs = offset + size;
int regionStart = offset & ~_pageMask;
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
_jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart));
region.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart));
}
private static void ReprotectAsExecutable(int offset, int size)
private static void ReprotectAsExecutable(ReservedRegion region, int offset, int size)
{
int endOffs = offset + size;
int regionStart = offset & ~_pageMask;
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
_jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart));
region.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart));
}
private static int Allocate(int codeSize)
{
codeSize = AlignCodeSize(codeSize);
int allocOffset = _cacheAllocator.Allocate(codeSize);
if (allocOffset < 0)
for (int i = _activeRegionIndex; i < _jitRegions.Count; i++)
{
throw new OutOfMemoryException("JIT Cache exhausted.");
int allocOffset = _cacheAllocator.Allocate(codeSize);
if (allocOffset >= 0)
{
_jitRegions[i].ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
_activeRegionIndex = i;
return allocOffset;
}
}
_jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
int exhaustedRegion = _activeRegionIndex;
var newRegion = new ReservedRegion(_jitRegions[0].Allocator, CacheSize);
_jitRegions.Add(newRegion);
_activeRegionIndex = _jitRegions.Count - 1;
int newRegionNumber = _activeRegionIndex;
return allocOffset;
Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {newRegionNumber} ({((newRegionNumber + 1) * CacheSize).Bytes()} Total Allocation).");
_cacheAllocator = new CacheMemoryAllocator(CacheSize);
int allocOffsetNew = _cacheAllocator.Allocate(codeSize);
if (allocOffsetNew < 0)
{
throw new OutOfMemoryException("Failed to allocate in new Cache Region!");
}
newRegion.ExpandIfNeeded((ulong)allocOffsetNew + (ulong)codeSize);
return allocOffsetNew;
}
private static int AlignCodeSize(int codeSize)
{
return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
@ -185,18 +224,21 @@ namespace ARMeilleure.Translation.Cache
{
lock (_lock)
{
int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default));
if (index < 0)
foreach (var region in _jitRegions)
{
index = ~index - 1;
}
int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default));
if (index >= 0)
{
entry = _cacheEntries[index];
entryIndex = index;
return true;
if (index < 0)
{
index = ~index - 1;
}
if (index >= 0)
{
entry = _cacheEntries[index];
entryIndex = index;
return true;
}
}
}

View File

@ -1,4 +1,6 @@
using ARMeilleure.Memory;
using Humanizer;
using Ryujinx.Common.Logging;
using Ryujinx.Memory;
using System;
using System.Collections.Generic;
@ -15,9 +17,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache
private static readonly int _pageMask = _pageSize - 1;
private const int CodeAlignment = 4; // Bytes.
private const int CacheSize = 2047 * 1024 * 1024;
private const int CacheSize = 256 * 1024 * 1024;
private static ReservedRegion _jitRegion;
private static JitCacheInvalidation _jitCacheInvalidator;
private static CacheMemoryAllocator _cacheAllocator;
@ -26,6 +27,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache
private static readonly Lock _lock = new();
private static bool _initialized;
private static readonly List<ReservedRegion> _jitRegions = new();
private static int _activeRegionIndex = 0;
[SupportedOSPlatform("windows")]
[LibraryImport("kernel32.dll", SetLastError = true)]
@ -45,7 +48,9 @@ namespace Ryujinx.Cpu.LightningJit.Cache
return;
}
_jitRegion = new ReservedRegion(allocator, CacheSize);
var firstRegion = new ReservedRegion(allocator, CacheSize);
_jitRegions.Add(firstRegion);
_activeRegionIndex = 0;
if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS())
{
@ -65,8 +70,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache
Debug.Assert(_initialized);
int funcOffset = Allocate(code.Length);
nint funcPtr = _jitRegion.Pointer + funcOffset;
ReservedRegion targetRegion = _jitRegions[_activeRegionIndex];
nint funcPtr = targetRegion.Pointer + funcOffset;
if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
@ -80,18 +85,11 @@ namespace Ryujinx.Cpu.LightningJit.Cache
}
else
{
ReprotectAsWritable(funcOffset, code.Length);
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
ReprotectAsExecutable(funcOffset, code.Length);
ReprotectAsWritable(targetRegion, funcOffset, code.Length);
Marshal.Copy(code.ToArray(), 0, funcPtr, code.Length);
ReprotectAsExecutable(targetRegion, funcOffset, code.Length);
if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
FlushInstructionCache(Process.GetCurrentProcess().Handle, funcPtr, (nuint)code.Length);
}
else
{
_jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length);
}
_jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length);
}
Add(funcOffset, code.Length);
@ -106,50 +104,80 @@ namespace Ryujinx.Cpu.LightningJit.Cache
{
Debug.Assert(_initialized);
int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64());
if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset)
foreach (var region in _jitRegions)
{
_cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size));
_cacheEntries.RemoveAt(entryIndex);
if (pointer.ToInt64() < region.Pointer.ToInt64() ||
pointer.ToInt64() >= (region.Pointer + CacheSize).ToInt64())
{
continue;
}
int funcOffset = (int)(pointer.ToInt64() - region.Pointer.ToInt64());
if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset)
{
_cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size));
_cacheEntries.RemoveAt(entryIndex);
}
return;
}
}
}
private static void ReprotectAsWritable(int offset, int size)
private static void ReprotectAsWritable(ReservedRegion region, int offset, int size)
{
int endOffs = offset + size;
int regionStart = offset & ~_pageMask;
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
_jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart));
region.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart));
}
private static void ReprotectAsExecutable(int offset, int size)
private static void ReprotectAsExecutable(ReservedRegion region, int offset, int size)
{
int endOffs = offset + size;
int regionStart = offset & ~_pageMask;
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
_jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart));
region.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart));
}
private static int Allocate(int codeSize)
{
codeSize = AlignCodeSize(codeSize);
int allocOffset = _cacheAllocator.Allocate(codeSize);
if (allocOffset < 0)
for (int i = _activeRegionIndex; i < _jitRegions.Count; i++)
{
throw new OutOfMemoryException("JIT Cache exhausted.");
int allocOffset = _cacheAllocator.Allocate(codeSize);
if (allocOffset >= 0)
{
_jitRegions[i].ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
_activeRegionIndex = i;
return allocOffset;
}
}
_jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
int exhaustedRegion = _activeRegionIndex;
var newRegion = new ReservedRegion(_jitRegions[0].Allocator, CacheSize);
_jitRegions.Add(newRegion);
_activeRegionIndex = _jitRegions.Count - 1;
int newRegionNumber = _activeRegionIndex;
return allocOffset;
Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {newRegionNumber} ({((newRegionNumber + 1) * CacheSize).Bytes()} Total Allocation).");
_cacheAllocator = new CacheMemoryAllocator(CacheSize);
int allocOffsetNew = _cacheAllocator.Allocate(codeSize);
if (allocOffsetNew < 0)
{
throw new OutOfMemoryException("Failed to allocate in new Cache Region!");
}
newRegion.ExpandIfNeeded((ulong)allocOffsetNew + (ulong)codeSize);
return allocOffsetNew;
}
private static int AlignCodeSize(int codeSize)

View File

@ -12,7 +12,7 @@ namespace Ryujinx.Cpu.LightningJit.Cache
{
private const int CodeAlignment = 4; // Bytes.
private const int SharedCacheSize = 2047 * 1024 * 1024;
private const int LocalCacheSize = 128 * 1024 * 1024;
private const int LocalCacheSize = 256 * 1024 * 1024;
// How many calls to the same function we allow until we pad the shared cache to force the function to become available there
// and allow the guest to take the fast path.

View File

@ -150,6 +150,7 @@ namespace Ryujinx.HLE.HOS.Services.Sockets.Bsd.Impl
{ BsdSocketOption.SoLinger, SocketOptionName.Linger },
{ BsdSocketOption.SoOobInline, SocketOptionName.OutOfBandInline },
{ BsdSocketOption.SoReusePort, SocketOptionName.ReuseAddress },
{ BsdSocketOption.SoNoSigpipe, SocketOptionName.DontLinger },
{ BsdSocketOption.SoSndBuf, SocketOptionName.SendBuffer },
{ BsdSocketOption.SoRcvBuf, SocketOptionName.ReceiveBuffer },
{ BsdSocketOption.SoSndLoWat, SocketOptionName.SendLowWater },