LotP1 e653848a2c
JIT Sparse Function Table (#250)
More up to date build of the JIT Sparse PR for continued development.
JIT Sparse Function Table was originally developed by riperiperi for the
original Ryujinx project, and decreased the amount of layers in the
Function Table structure, to decrease lookup times at the cost of
slightly higher RAM usage.
This PR rebalances the JIT Sparse Function Table to be a bit more RAM
intensive, but faster in workloads where the JIT Function Table is a
bottleneck. Faster RAM will see a bigger impact and slower RAM (DDR3 and
potentially slow DDR4) will see a slight performance decrease.
This PR also implements a base for a PPTC profile system that could
allow for PPTC with ExeFS mods enabled in the future.
This PR also potentially fixes a strange issue where Avalonia would time
out in some rare instances, e.g. when running ExeFS mods with TotK and a
strange controller configuration.

---------

Co-authored-by: Evan Husted <gr33m11@gmail.com>
2024-11-22 15:33:44 -06:00

209 lines
6.5 KiB
C#

using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Cpu.Jit;
using Ryujinx.Cpu.LightningJit.Cache;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using Ryujinx.Cpu.LightningJit.State;
using Ryujinx.Cpu.Signal;
using Ryujinx.Memory;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Threading;
namespace Ryujinx.Cpu.LightningJit
{
class Translator : IDisposable
{
// Should be enabled on platforms that enforce W^X.
private static bool IsNoWxPlatform => false;
private readonly ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>> _oldFuncs;
private readonly NoWxCache _noWxCache;
private bool _disposed;
internal TranslatorCache<TranslatedFunction> Functions { get; }
internal AddressTable<ulong> FunctionTable { get; }
internal TranslatorStubs Stubs { get; }
internal IMemoryManager Memory { get; }
public Translator(IMemoryManager memory, AddressTable<ulong> functionTable)
{
Memory = memory;
_oldFuncs = new ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>>();
if (IsNoWxPlatform)
{
_noWxCache = new(new JitMemoryAllocator(), CreateStackWalker(), this);
}
else
{
JitCache.Initialize(new JitMemoryAllocator(forJit: true));
}
Functions = new TranslatorCache<TranslatedFunction>();
FunctionTable = functionTable;
Stubs = new TranslatorStubs(FunctionTable, _noWxCache);
FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;
if (memory.Type.IsHostMappedOrTracked())
{
NativeSignalHandler.InitializeSignalHandler();
}
}
private static IStackWalker CreateStackWalker()
{
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
return new StackWalker();
}
else
{
throw new PlatformNotSupportedException();
}
}
public void Execute(State.ExecutionContext context, ulong address)
{
ObjectDisposedException.ThrowIf(_disposed, this);
NativeInterface.RegisterThread(context, Memory, this);
Stubs.DispatchLoop(context.NativeContextPtr, address);
NativeInterface.UnregisterThread();
_noWxCache?.ClearEntireThreadLocalCache();
}
internal nint GetOrTranslatePointer(nint framePointer, ulong address, ExecutionMode mode)
{
if (_noWxCache != null)
{
CompiledFunction func = Compile(address, mode);
return _noWxCache.Map(framePointer, func.Code, address, (ulong)func.GuestCodeLength);
}
return GetOrTranslate(address, mode).FuncPointer;
}
private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
{
if (!Functions.TryGetValue(address, out TranslatedFunction func))
{
func = Translate(address, mode);
TranslatedFunction oldFunc = Functions.GetOrAdd(address, func.GuestSize, func);
if (oldFunc != func)
{
JitCache.Unmap(func.FuncPointer);
func = oldFunc;
}
RegisterFunction(address, func);
}
return func;
}
internal void RegisterFunction(ulong guestAddress, TranslatedFunction func)
{
if (FunctionTable.IsValid(guestAddress))
{
Volatile.Write(ref FunctionTable.GetValue(guestAddress), (ulong)func.FuncPointer);
}
}
private TranslatedFunction Translate(ulong address, ExecutionMode mode)
{
CompiledFunction func = Compile(address, mode);
nint funcPointer = JitCache.Map(func.Code);
return new TranslatedFunction(funcPointer, (ulong)func.GuestCodeLength);
}
private CompiledFunction Compile(ulong address, ExecutionMode mode)
{
return AarchCompiler.Compile(CpuPresets.CortexA57, Memory, address, FunctionTable, Stubs.DispatchStub, mode, RuntimeInformation.ProcessArchitecture);
}
public void InvalidateJitCacheRegion(ulong address, ulong size)
{
ulong[] overlapAddresses = Array.Empty<ulong>();
int overlapsCount = Functions.GetOverlaps(address, size, ref overlapAddresses);
for (int index = 0; index < overlapsCount; index++)
{
ulong overlapAddress = overlapAddresses[index];
if (Functions.TryGetValue(overlapAddress, out TranslatedFunction overlap))
{
Functions.Remove(overlapAddress);
Volatile.Write(ref FunctionTable.GetValue(overlapAddress), FunctionTable.Fill);
EnqueueForDeletion(overlapAddress, overlap);
}
}
// TODO: Remove overlapping functions from the JitCache aswell.
// This should be done safely, with a mechanism to ensure the function is not being executed.
}
private void EnqueueForDeletion(ulong guestAddress, TranslatedFunction func)
{
_oldFuncs.Enqueue(new(guestAddress, func));
}
private void ClearJitCache()
{
List<TranslatedFunction> functions = Functions.AsList();
foreach (var func in functions)
{
JitCache.Unmap(func.FuncPointer);
}
Functions.Clear();
while (_oldFuncs.TryDequeue(out var kv))
{
JitCache.Unmap(kv.Value.FuncPointer);
}
}
protected virtual void Dispose(bool disposing)
{
if (!_disposed)
{
if (disposing)
{
if (_noWxCache != null)
{
_noWxCache.Dispose();
}
else
{
ClearJitCache();
}
Stubs.Dispose();
FunctionTable.Dispose();
}
_disposed = true;
}
}
public void Dispose()
{
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
}
}