diff --git a/src/Ryujinx.Cpu/Nce/Arm64/ArmCondition.cs b/src/Ryujinx.Cpu/Nce/Arm64/ArmCondition.cs
new file mode 100644
index 000000000..125fe2a10
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/ArmCondition.cs
@@ -0,0 +1,22 @@
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    enum ArmCondition
+    {
+        Eq = 0,
+        Ne = 1,
+        GeUn = 2,
+        LtUn = 3,
+        Mi = 4,
+        Pl = 5,
+        Vs = 6,
+        Vc = 7,
+        GtUn = 8,
+        LeUn = 9,
+        Ge = 10,
+        Lt = 11,
+        Gt = 12,
+        Le = 13,
+        Al = 14,
+        Nv = 15,
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/ArmExtensionType.cs b/src/Ryujinx.Cpu/Nce/Arm64/ArmExtensionType.cs
new file mode 100644
index 000000000..48416da46
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/ArmExtensionType.cs
@@ -0,0 +1,14 @@
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    enum ArmExtensionType
+    {
+        Uxtb = 0,
+        Uxth = 1,
+        Uxtw = 2,
+        Uxtx = 3,
+        Sxtb = 4,
+        Sxth = 5,
+        Sxtw = 6,
+        Sxtx = 7,
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/ArmShiftType.cs b/src/Ryujinx.Cpu/Nce/Arm64/ArmShiftType.cs
new file mode 100644
index 000000000..290e141b9
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/ArmShiftType.cs
@@ -0,0 +1,11 @@
+
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    enum ArmShiftType
+    {
+        Lsl = 0,
+        Lsr = 1,
+        Asr = 2,
+        Ror = 3,
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/Assembler.cs b/src/Ryujinx.Cpu/Nce/Arm64/Assembler.cs
new file mode 100644
index 000000000..8db1575f1
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/Assembler.cs
@@ -0,0 +1,1103 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    class Assembler
+    {
+        private const uint SfFlag = 1u << 31;
+
+        public const int SpRegister = 31;
+        public const int ZrRegister = 31;
+
+        private readonly List<uint> _code;
+
+        public int CodeWords => _code.Count;
+
+        private class LabelState
+        {
+            public int BranchIndex;
+            public int TargetIndex;
+            public bool HasBranch;
+            public bool HasTarget;
+        }
+
+        private List<LabelState> _labels;
+
+        public Assembler()
+        {
+            _code = new List<uint>();
+            _labels = new List<LabelState>();
+        }
+
+        public Operand CreateLabel()
+        {
+            int labelIndex = _labels.Count;
+            _labels.Add(new LabelState());
+
+            return new Operand(OperandKind.Label, OperandType.None, (ulong)labelIndex);
+        }
+
+        public void MarkLabel(Operand label)
+        {
+            int targetIndex = _code.Count;
+
+            var state = _labels[label.AsInt32()];
+
+            state.TargetIndex = targetIndex;
+            state.HasTarget = true;
+
+            if (state.HasBranch)
+            {
+                int imm = (targetIndex - state.BranchIndex) * sizeof(uint);
+                uint code = _code[state.BranchIndex];
+
+                if ((code & 0xfc000000u) == 0x14000000u)
+                {
+                    _code[state.BranchIndex] = code | EncodeSImm26_2(imm);
+                }
+                else
+                {
+                    _code[state.BranchIndex] = code | (EncodeSImm19_2(imm) << 5);
+                }
+            }
+        }
+
+        public void B(Operand label, ArmCondition condition = ArmCondition.Al)
+        {
+            int branchIndex = _code.Count;
+
+            var state = _labels[label.AsInt32()];
+
+            state.BranchIndex = branchIndex;
+            state.HasBranch = true;
+
+            int imm = 0;
+
+            if (state.HasTarget)
+            {
+                imm = (state.TargetIndex - branchIndex) * sizeof(uint);
+            }
+
+            if (condition == ArmCondition.Al)
+            {
+                B(imm);
+            }
+            else
+            {
+                B(condition, imm);
+            }
+        }
+
+        public void Cbz(Operand rt, Operand label)
+        {
+            int branchIndex = _code.Count;
+
+            var state = _labels[label.AsInt32()];
+
+            state.BranchIndex = branchIndex;
+            state.HasBranch = true;
+
+            int imm = 0;
+
+            if (state.HasTarget)
+            {
+                imm = (state.TargetIndex - branchIndex) * sizeof(uint);
+            }
+
+            Cbz(rt, imm);
+        }
+
+        public void Cbnz(Operand rt, Operand label)
+        {
+            int branchIndex = _code.Count;
+
+            var state = _labels[label.AsInt32()];
+
+            state.BranchIndex = branchIndex;
+            state.HasBranch = true;
+
+            int imm = 0;
+
+            if (state.HasTarget)
+            {
+                imm = (state.TargetIndex - branchIndex) * sizeof(uint);
+            }
+
+            Cbnz(rt, imm);
+        }
+
+        public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+        {
+            WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount);
+        }
+
+        public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false)
+        {
+            WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm);
+        }
+
+        public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Asr(Operand rd, Operand rn, Operand rm)
+        {
+            if (rm.Kind == OperandKind.Constant)
+            {
+                int shift = rm.AsInt32();
+                int mask = rd.Type == OperandType.I64 ? 63 : 31;
+                shift &= mask;
+                Sbfm(rd, rn, shift, mask);
+            }
+            else
+            {
+                Asrv(rd, rn, rm);
+            }
+        }
+
+        public void Asrv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm);
+        }
+
+        public void B(int imm)
+        {
+            WriteUInt32(0x14000000u | EncodeSImm26_2(imm));
+        }
+
+        public void B(ArmCondition condition, int imm)
+        {
+            WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5));
+        }
+
+        public void Blr(Operand rn)
+        {
+            WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5));
+        }
+
+        public void Br(Operand rn)
+        {
+            WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5));
+        }
+
+        public void Brk()
+        {
+            WriteUInt32(0xd4200000u);
+        }
+
+        public void Cbz(Operand rt, int imm)
+        {
+            WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt);
+        }
+
+        public void Cbnz(Operand rt, int imm)
+        {
+            WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt);
+        }
+
+        public void Clrex(int crm = 15)
+        {
+            WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8));
+        }
+
+        public void Clz(Operand rd, Operand rn)
+        {
+            WriteInstructionAuto(0x5ac01000u, rd, rn);
+        }
+
+        public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            Subs(new Operand(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+        {
+            WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm);
+        }
+
+        public void Cset(Operand rd, ArmCondition condition)
+        {
+            var zr = new Operand(ZrRegister, RegisterType.Integer, rd.Type);
+            Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1));
+        }
+
+        public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+        {
+            WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm);
+        }
+
+        public void Dmb(uint option)
+        {
+            WriteUInt32(0xd50330bfu | (option << 8));
+        }
+
+        public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Extr(Operand rd, Operand rn, Operand rm, int imms)
+        {
+            uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+            WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm);
+        }
+
+        public void Ldaxp(Operand rt, Operand rt2, Operand rn)
+        {
+            WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2);
+        }
+
+        public void Ldaxr(Operand rt, Operand rn)
+        {
+            WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn);
+        }
+
+        public void Ldaxrb(Operand rt, Operand rn)
+        {
+            WriteInstruction(0x085ffc00u, rt, rn);
+        }
+
+        public void Ldaxrh(Operand rt, Operand rn)
+        {
+            WriteInstruction(0x085ffc00u | (1u << 30), rt, rn);
+        }
+
+        public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void LdrLit(Operand rt, int offset)
+        {
+            uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5);
+
+            if (rt.Type == OperandType.I64)
+            {
+                instruction |= 1u << 30;
+            }
+
+            WriteInstruction(instruction, rt);
+        }
+
+        public void LdrRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type);
+            WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+        }
+
+        public void LdrbRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrbRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrbRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrhRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrhRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void LdrhRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void Ldur(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void Lsl(Operand rd, Operand rn, Operand rm)
+        {
+            if (rm.Kind == OperandKind.Constant)
+            {
+                int shift = rm.AsInt32();
+                int mask = rd.Type == OperandType.I64 ? 63 : 31;
+                shift &= mask;
+                Ubfm(rd, rn, -shift & mask, mask - shift);
+            }
+            else
+            {
+                Lslv(rd, rn, rm);
+            }
+        }
+
+        public void Lslv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm);
+        }
+
+        public void Lsr(Operand rd, Operand rn, Operand rm)
+        {
+            if (rm.Kind == OperandKind.Constant)
+            {
+                int shift = rm.AsInt32();
+                int mask = rd.Type == OperandType.I64 ? 63 : 31;
+                shift &= mask;
+                Ubfm(rd, rn, shift, mask);
+            }
+            else
+            {
+                Lsrv(rd, rn, rm);
+            }
+        }
+
+        public void Lsrv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm);
+        }
+
+        public void Madd(Operand rd, Operand rn, Operand rm, Operand ra)
+        {
+            WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra);
+        }
+
+        public void Mul(Operand rd, Operand rn, Operand rm)
+        {
+            Madd(rd, rn, rm, new Operand(ZrRegister, RegisterType.Integer, rd.Type));
+        }
+
+        public void Mov(Operand rd, Operand rn)
+        {
+            Debug.Assert(rd.Type.IsInteger());
+            Orr(rd, new Operand(ZrRegister, RegisterType.Integer, rd.Type), rn);
+        }
+
+        public void MovSp(Operand rd, Operand rn)
+        {
+            if (rd.GetRegister().Index == SpRegister ||
+                rn.GetRegister().Index == SpRegister)
+            {
+                Add(rd, rn, new Operand(rd.Type, 0), immForm: true);
+            }
+            else
+            {
+                Mov(rd, rn);
+            }
+        }
+
+        public void Mov(Operand rd, ulong value)
+        {
+            if (value == 0)
+            {
+                Mov(rd, new Operand(ZrRegister, RegisterType.Integer, rd.Type));
+            }
+            else if (CodeGenCommon.TryEncodeBitMask(rd.Type, value, out _, out _, out _))
+            {
+                Orr(rd, new Operand(ZrRegister, RegisterType.Integer, rd.Type), new Operand(OperandKind.Constant, rd.Type, value));
+            }
+            else
+            {
+                int hw = 0;
+                bool first = true;
+
+                while (value != 0)
+                {
+                    int valueLow = (ushort)value;
+                    if (valueLow != 0)
+                    {
+                        if (first)
+                        {
+                            Movz(rd, valueLow, hw);
+                            first = false;
+                        }
+                        else
+                        {
+                            Movk(rd, valueLow, hw);
+                        }
+                    }
+
+                    hw++;
+                    value >>= 16;
+                }
+            }
+        }
+
+        public void Mov(Operand rd, int imm)
+        {
+            Movz(rd, imm, 0);
+        }
+
+        public void Movz(Operand rd, int imm, int hw)
+        {
+            Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+            WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+        }
+
+        public void Movk(Operand rd, int imm, int hw)
+        {
+            Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+            WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+        }
+
+        public void MrsTpidrEl0(Operand rt)
+        {
+            WriteInstruction(0xd53bd040u, rt);
+        }
+
+        public void MrsTpidrroEl0(Operand rt)
+        {
+            WriteInstruction(0xd53bd060u, rt);
+        }
+
+        public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2)
+        {
+            uint instruction = 0xd5300000u;
+
+            instruction |= (op2 & 7) << 5;
+            instruction |= (crm & 15) << 8;
+            instruction |= (crn & 15) << 12;
+            instruction |= (op1 & 7) << 16;
+            instruction |= (o0 & 1) << 19;
+
+            WriteInstruction(instruction, rt);
+        }
+
+        public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            Orn(rd, new Operand(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+        }
+
+        public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            Sub(rd, new Operand(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+        }
+
+        public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Ret(Operand rn)
+        {
+            WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5));
+        }
+
+        public void Rev(Operand rd, Operand rn)
+        {
+            uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u;
+            WriteInstructionAuto(0x5ac00800u | opc0, rd, rn);
+        }
+
+        public void Ror(Operand rd, Operand rn, Operand rm)
+        {
+            if (rm.Kind == OperandKind.Constant)
+            {
+                int shift = rm.AsInt32();
+                int mask = rd.Type == OperandType.I64 ? 63 : 31;
+                shift &= mask;
+                Extr(rd, rn, rn, shift);
+            }
+            else
+            {
+                Rorv(rd, rn, rm);
+            }
+        }
+
+        public void Rorv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm);
+        }
+
+        public void Sbfm(Operand rd, Operand rn, int immr, int imms)
+        {
+            uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+            WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+        }
+
+        public void Sdiv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm);
+        }
+
+        public void Smulh(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionRm16(0x9b407c00u, rd, rn, rm);
+        }
+
+        public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs)
+        {
+            WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2);
+        }
+
+        public void Stlxr(Operand rt, Operand rn, Operand rs)
+        {
+            WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs);
+        }
+
+        public void Stlxrb(Operand rt, Operand rn, Operand rs)
+        {
+            WriteInstructionRm16(0x0800fc00u, rt, rn, rs);
+        }
+
+        public void Stlxrh(Operand rt, Operand rn, Operand rs)
+        {
+            WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs);
+        }
+
+        public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+        {
+            uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type);
+            WriteInstruction(instruction, rt, rn, rt2);
+        }
+
+        public void StrRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type);
+            WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+        }
+
+        public void StrbRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrbRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrbRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrhRiPost(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrhRiPre(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void StrhRiUn(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void Stur(Operand rt, Operand rn, int imm)
+        {
+            uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12);
+            WriteInstruction(instruction, rt, rn);
+        }
+
+        public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+        {
+            WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount);
+        }
+
+        public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Sxtb(Operand rd, Operand rn)
+        {
+            Sbfm(rd, rn, 0, 7);
+        }
+
+        public void Sxth(Operand rd, Operand rn)
+        {
+            Sbfm(rd, rn, 0, 15);
+        }
+
+        public void Sxtw(Operand rd, Operand rn)
+        {
+            Sbfm(rd, rn, 0, 31);
+        }
+
+        public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+        {
+            Ands(new Operand(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+        }
+
+        public void Ubfm(Operand rd, Operand rn, int immr, int imms)
+        {
+            uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+            WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+        }
+
+        public void Udiv(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm);
+        }
+
+        public void Umov(Operand rd, Operand rn, int index, int size)
+        {
+            uint q = size == 3 ? 1u << 30 : 0u;
+            WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn);
+        }
+
+        public void Umulh(Operand rd, Operand rn, Operand rm)
+        {
+            WriteInstructionRm16(0x9bc07c00u, rd, rn, rm);
+        }
+
+        public void Uxtb(Operand rd, Operand rn)
+        {
+            Ubfm(rd, rn, 0, 7);
+        }
+
+        public void Uxth(Operand rd, Operand rn)
+        {
+            Ubfm(rd, rn, 0, 15);
+        }
+
+        private void WriteInstructionAuto(
+            uint instI,
+            uint instR,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmShiftType shiftType = ArmShiftType.Lsl,
+            int shiftAmount = 0,
+            bool immForm = false)
+        {
+            if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm))
+            {
+                Debug.Assert(shiftAmount == 0);
+                int imm = rm.AsInt32();
+                Debug.Assert((uint)imm == rm.Value);
+                if (imm != 0 && (imm & 0xfff) == 0)
+                {
+                    instI |= 1 << 22; // sh flag
+                    imm >>= 12;
+                }
+                WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn);
+            }
+            else
+            {
+                instR |= EncodeUImm6(shiftAmount) << 10;
+                instR |= (uint)shiftType << 22;
+
+                WriteInstructionRm16Auto(instR, rd, rn, rm);
+            }
+        }
+
+        private void WriteInstructionAuto(
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmExtensionType extensionType,
+            int shiftAmount = 0)
+        {
+            Debug.Assert((uint)shiftAmount <= 4);
+
+            instruction |= (uint)shiftAmount << 10;
+            instruction |= (uint)extensionType << 13;
+
+            WriteInstructionRm16Auto(instruction, rd, rn, rm);
+        }
+
+        private void WriteInstructionBitwiseAuto(
+            uint instI,
+            uint instR,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmShiftType shiftType = ArmShiftType.Lsl,
+            int shiftAmount = 0)
+        {
+            if (rm.Kind == OperandKind.Constant && rm.Value != 0)
+            {
+                Debug.Assert(shiftAmount == 0);
+                bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR);
+                Debug.Assert(canEncode);
+                uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22);
+
+                WriteInstructionAuto(instruction, rd, rn);
+            }
+            else
+            {
+                WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount);
+            }
+        }
+
+        private void WriteInstructionBitwiseAuto(
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmShiftType shiftType = ArmShiftType.Lsl,
+            int shiftAmount = 0)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            instruction |= EncodeUImm6(shiftAmount) << 10;
+            instruction |= (uint)shiftType << 22;
+
+            WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private void WriteInstructionLdrStrAuto(
+            uint instruction,
+            Operand rd,
+            Operand rn,
+            Operand rm,
+            ArmExtensionType extensionType,
+            bool shift)
+        {
+            if (shift)
+            {
+                instruction |= 1u << 12;
+            }
+
+            instruction |= (uint)extensionType << 13;
+
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= 1u << 30;
+            }
+
+            WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        private void WriteInstructionAuto(uint instruction, Operand rd)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteInstruction(instruction, rd);
+        }
+
+        public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteInstruction(instruction, rd, rn);
+        }
+
+        private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteInstruction(instruction, rd, rn, rm, ra);
+        }
+
+        public void WriteInstruction(uint instruction, Operand rd)
+        {
+            WriteUInt32(instruction | EncodeReg(rd));
+        }
+
+        public void WriteInstruction(uint instruction, Operand rd, Operand rn)
+        {
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+        }
+
+        public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm)
+        {
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10));
+        }
+
+        public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+        {
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16));
+        }
+
+        private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm)
+        {
+            if (rd.Type == OperandType.I64)
+            {
+                instruction |= SfFlag;
+            }
+
+            WriteInstructionRm16(instruction, rd, rn, rm);
+        }
+
+        public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm)
+        {
+            WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+        }
+
+        private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type)
+        {
+            uint instruction;
+            int scale;
+
+            if (type.IsInteger())
+            {
+                instruction = intInst;
+
+                if (type == OperandType.I64)
+                {
+                    instruction |= SfFlag;
+                    scale = 3;
+                }
+                else
+                {
+                    scale = 2;
+                }
+            }
+            else
+            {
+                int opc = type switch
+                {
+                    OperandType.FP32 => 0,
+                    OperandType.FP64 => 1,
+                    _ => 2,
+                };
+
+                instruction = vecInst | ((uint)opc << 30);
+                scale = 2 + opc;
+            }
+
+            instruction |= (EncodeSImm7(imm, scale) << 15);
+
+            return instruction;
+        }
+
+        private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type)
+        {
+            uint instruction;
+
+            if (type.IsInteger())
+            {
+                instruction = intInst;
+
+                if (type == OperandType.I64)
+                {
+                    instruction |= 1 << 30;
+                }
+            }
+            else
+            {
+                instruction = vecInst;
+
+                if (type == OperandType.V128)
+                {
+                    instruction |= 1u << 23;
+                }
+                else
+                {
+                    instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30;
+                }
+            }
+
+            return instruction;
+        }
+
+        private static uint EncodeIndexSizeImm5(int index, int size)
+        {
+            Debug.Assert((uint)size < 4);
+            Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination.");
+            return ((uint)index << (size + 1)) | (1u << size);
+        }
+
+        private static uint EncodeSImm7(int value, int scale)
+        {
+            uint imm = (uint)(value >> scale) & 0x7f;
+            Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+            return imm;
+        }
+
+        private static uint EncodeSImm9(int value)
+        {
+            uint imm = (uint)value & 0x1ff;
+            Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeSImm19_2(int value)
+        {
+            uint imm = (uint)(value >> 2) & 0x7ffff;
+            Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeSImm26_2(int value)
+        {
+            uint imm = (uint)(value >> 2) & 0x3ffffff;
+            Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeUImm4(int value)
+        {
+            uint imm = (uint)value & 0xf;
+            Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeUImm6(int value)
+        {
+            uint imm = (uint)value & 0x3f;
+            Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeUImm12(int value, OperandType type)
+        {
+            return EncodeUImm12(value, GetScaleForType(type));
+        }
+
+        private static uint EncodeUImm12(int value, int scale)
+        {
+            uint imm = (uint)(value >> scale) & 0xfff;
+            Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+            return imm;
+        }
+
+        private static uint EncodeUImm16(int value)
+        {
+            uint imm = (uint)value & 0xffff;
+            Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+            return imm;
+        }
+
+        private static uint EncodeReg(Operand reg)
+        {
+            if (reg.Kind == OperandKind.Constant && reg.Value == 0)
+            {
+                return ZrRegister;
+            }
+
+            uint regIndex = (uint)reg.GetRegister().Index;
+            Debug.Assert(reg.Kind == OperandKind.Register);
+            Debug.Assert(regIndex < 32);
+            return regIndex;
+        }
+
+        private static int GetScaleForType(OperandType type)
+        {
+            return type switch
+            {
+                OperandType.I32 => 2,
+                OperandType.I64 => 3,
+                OperandType.FP32 => 2,
+                OperandType.FP64 => 3,
+                OperandType.V128 => 4,
+                _ => throw new ArgumentException($"Invalid type {type}."),
+            };
+        }
+
+        private void WriteUInt32(uint value)
+        {
+            _code.Add(value);
+        }
+
+        public uint[] GetCode()
+        {
+            return _code.ToArray();
+        }
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/CodeGenCommon.cs b/src/Ryujinx.Cpu/Nce/Arm64/CodeGenCommon.cs
new file mode 100644
index 000000000..205929760
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/CodeGenCommon.cs
@@ -0,0 +1,66 @@
+using System.Numerics;
+
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    static class CodeGenCommon
+    {
+        public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
+        {
+            return TryEncodeBitMask(operand.Type, operand.Value, out immN, out immS, out immR);
+        }
+
+        public static bool TryEncodeBitMask(OperandType type, ulong value, out int immN, out int immS, out int immR)
+        {
+            if (type == OperandType.I32)
+            {
+                value |= value << 32;
+            }
+
+            return TryEncodeBitMask(value, out immN, out immS, out immR);
+        }
+
+        public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR)
+        {
+            // Some special values also can't be encoded:
+            // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0).
+            // A value with all bits set can't be encoded because it is reserved according to the spec, because:
+            // Any value AND all ones will be equal itself, so it's effectively a no-op.
+            // Any value OR all ones will be equal all ones, so one can just use MOV.
+            // Any value XOR all ones will be equal its inverse, so one can just use MVN.
+            if (value == 0 || value == ulong.MaxValue)
+            {
+                immN = 0;
+                immS = 0;
+                immR = 0;
+
+                return false;
+            }
+
+            // Normalize value, rotating it such that the LSB is 1: Ensures we get a complete element that has not
+            // been cut-in-half across the word boundary.
+            int rotation = BitOperations.TrailingZeroCount(value & (value + 1));
+            ulong rotatedValue = ulong.RotateRight(value, rotation);
+
+            // Now that we have a complete element in the LSB with the LSB = 1, determine size and number of ones
+            // in element.
+            int elementSize = BitOperations.TrailingZeroCount(rotatedValue & (rotatedValue + 1));
+            int onesInElement = BitOperations.TrailingZeroCount(~rotatedValue);
+
+            // Check the value is repeating; also ensures element size is a power of two.
+            if (ulong.RotateRight(value, elementSize) != value)
+            {
+                immN = 0;
+                immS = 0;
+                immR = 0;
+
+                return false;
+            }
+
+            immN = (elementSize >> 6) & 1;
+            immS = (((~elementSize + 1) << 1) | (onesInElement - 1)) & 0x3f;
+            immR = (elementSize - rotation) & (elementSize - 1);
+
+            return true;
+        }
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/Operand.cs b/src/Ryujinx.Cpu/Nce/Arm64/Operand.cs
new file mode 100644
index 000000000..d5d36b70f
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/Operand.cs
@@ -0,0 +1,40 @@
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    struct Operand
+    {
+        public readonly OperandKind Kind { get; }
+        public readonly OperandType Type { get; }
+        public readonly ulong Value { get; }
+
+        public Operand(OperandKind kind, OperandType type, ulong value)
+        {
+            Kind = kind;
+            Type = type;
+            Value = value;
+        }
+
+        public Operand(int index, RegisterType regType, OperandType type) : this(OperandKind.Register, type, (ulong)((int)regType << 24 | index))
+        {
+        }
+
+        public Operand(OperandType type, ulong value) : this(OperandKind.Constant, type, value)
+        {
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public readonly Register GetRegister()
+        {
+            Debug.Assert(Kind == OperandKind.Register);
+
+            return new Register((int)Value & 0xffffff, (RegisterType)(Value >> 24));
+        }
+
+        public readonly int AsInt32()
+        {
+            return (int)Value;
+        }
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/OperandKind.cs b/src/Ryujinx.Cpu/Nce/Arm64/OperandKind.cs
new file mode 100644
index 000000000..4a822694b
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/OperandKind.cs
@@ -0,0 +1,10 @@
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    enum OperandKind
+    {
+        None,
+        Constant,
+        Label,
+        Register,
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/OperandType.cs b/src/Ryujinx.Cpu/Nce/Arm64/OperandType.cs
new file mode 100644
index 000000000..27e72815d
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/OperandType.cs
@@ -0,0 +1,36 @@
+using System;
+
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    enum OperandType
+    {
+        None,
+        I32,
+        I64,
+        FP32,
+        FP64,
+        V128,
+    }
+
+    static class OperandTypeExtensions
+    {
+        public static bool IsInteger(this OperandType type)
+        {
+            return type == OperandType.I32 ||
+                   type == OperandType.I64;
+        }
+
+        public static int GetSizeInBytes(this OperandType type)
+        {
+            return type switch
+            {
+                OperandType.FP32 => 4,
+                OperandType.FP64 => 8,
+                OperandType.I32 => 4,
+                OperandType.I64 => 8,
+                OperandType.V128 => 16,
+                _ => throw new InvalidOperationException($"Invalid operand type \"{type}\"."),
+            };
+        }
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/Register.cs b/src/Ryujinx.Cpu/Nce/Arm64/Register.cs
new file mode 100644
index 000000000..8277fda7a
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/Register.cs
@@ -0,0 +1,43 @@
+using System;
+
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    readonly struct Register : IEquatable<Register>
+    {
+        public int Index { get; }
+
+        public RegisterType Type { get; }
+
+        public Register(int index, RegisterType type)
+        {
+            Index = index;
+            Type = type;
+        }
+
+        public override int GetHashCode()
+        {
+            return (ushort)Index | ((int)Type << 16);
+        }
+
+        public static bool operator ==(Register x, Register y)
+        {
+            return x.Equals(y);
+        }
+
+        public static bool operator !=(Register x, Register y)
+        {
+            return !x.Equals(y);
+        }
+
+        public override bool Equals(object obj)
+        {
+            return obj is Register reg && Equals(reg);
+        }
+
+        public bool Equals(Register other)
+        {
+            return other.Index == Index &&
+                   other.Type == Type;
+        }
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/RegisterSaveRestore.cs b/src/Ryujinx.Cpu/Nce/Arm64/RegisterSaveRestore.cs
new file mode 100644
index 000000000..aade05977
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/RegisterSaveRestore.cs
@@ -0,0 +1,220 @@
+using System.Numerics;
+
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    readonly struct RegisterSaveRestore
+    {
+        private const int FpRegister = 29;
+        private const int LrRegister = 30;
+
+        private const int Encodable9BitsOffsetLimit = 0x200;
+
+        private readonly int _intMask;
+        private readonly int _vecMask;
+        private readonly OperandType _vecType;
+        private readonly bool _hasCall;
+
+        public RegisterSaveRestore(int intMask, int vecMask = 0, OperandType vecType = OperandType.FP64, bool hasCall = false)
+        {
+            _intMask = intMask;
+            _vecMask = vecMask;
+            _vecType = vecType;
+            _hasCall = hasCall;
+        }
+
+        public void WritePrologue(Assembler asm)
+        {
+            int intMask = _intMask;
+            int vecMask = _vecMask;
+
+            int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+            int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+            int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * _vecType.GetSizeInBytes());
+
+            int offset = 0;
+
+            WritePrologueCalleeSavesPreIndexed(asm, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+
+            if (_vecType == OperandType.V128 && (intCalleeSavedRegsCount & 1) != 0)
+            {
+                offset += 8;
+            }
+
+            WritePrologueCalleeSavesPreIndexed(asm, ref vecMask, ref offset, calleeSaveRegionSize, _vecType);
+
+            if (_hasCall)
+            {
+                Operand rsp = Register(Assembler.SpRegister);
+
+                asm.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -16);
+                asm.MovSp(Register(FpRegister), rsp);
+            }
+        }
+
+        private static void WritePrologueCalleeSavesPreIndexed(
+            Assembler asm,
+            ref int mask,
+            ref int offset,
+            int calleeSaveRegionSize,
+            OperandType type)
+        {
+            if ((BitOperations.PopCount((uint)mask) & 1) != 0)
+            {
+                int reg = BitOperations.TrailingZeroCount(mask);
+
+                mask &= ~(1 << reg);
+
+                if (offset != 0)
+                {
+                    asm.StrRiUn(Register(reg, type), Register(Assembler.SpRegister), offset);
+                }
+                else if (calleeSaveRegionSize < Encodable9BitsOffsetLimit)
+                {
+                    asm.StrRiPre(Register(reg, type), Register(Assembler.SpRegister), -calleeSaveRegionSize);
+                }
+                else
+                {
+                    asm.Sub(Register(Assembler.SpRegister), Register(Assembler.SpRegister), new Operand(OperandType.I64, (ulong)calleeSaveRegionSize));
+                    asm.StrRiUn(Register(reg, type), Register(Assembler.SpRegister), 0);
+                }
+
+                offset += type.GetSizeInBytes();
+            }
+
+            while (mask != 0)
+            {
+                int reg = BitOperations.TrailingZeroCount(mask);
+
+                mask &= ~(1 << reg);
+
+                int reg2 = BitOperations.TrailingZeroCount(mask);
+
+                mask &= ~(1 << reg2);
+
+                if (offset != 0)
+                {
+                    asm.StpRiUn(Register(reg, type), Register(reg2, type), Register(Assembler.SpRegister), offset);
+                }
+                else if (calleeSaveRegionSize < Encodable9BitsOffsetLimit)
+                {
+                    asm.StpRiPre(Register(reg, type), Register(reg2, type), Register(Assembler.SpRegister), -calleeSaveRegionSize);
+                }
+                else
+                {
+                    asm.Sub(Register(Assembler.SpRegister), Register(Assembler.SpRegister), new Operand(OperandType.I64, (ulong)calleeSaveRegionSize));
+                    asm.StpRiUn(Register(reg, type), Register(reg2, type), Register(Assembler.SpRegister), 0);
+                }
+
+                offset += type.GetSizeInBytes() * 2;
+            }
+        }
+
+        public void WriteEpilogue(Assembler asm)
+        {
+            int intMask = _intMask;
+            int vecMask = _vecMask;
+
+            int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+            int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+            bool misalignedVector = _vecType == OperandType.V128 && (intCalleeSavedRegsCount & 1) != 0;
+
+            int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * _vecType.GetSizeInBytes();
+
+            if (misalignedVector)
+            {
+                offset += 8;
+            }
+
+            int calleeSaveRegionSize = Align16(offset);
+
+            if (_hasCall)
+            {
+                Operand rsp = Register(Assembler.SpRegister);
+
+                asm.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, 16);
+            }
+
+            WriteEpilogueCalleeSavesPostIndexed(asm, ref vecMask, ref offset, calleeSaveRegionSize, _vecType);
+
+            if (misalignedVector)
+            {
+                offset -= 8;
+            }
+
+            WriteEpilogueCalleeSavesPostIndexed(asm, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+        }
+
+        private static void WriteEpilogueCalleeSavesPostIndexed(
+            Assembler asm,
+            ref int mask,
+            ref int offset,
+            int calleeSaveRegionSize,
+            OperandType type)
+        {
+            while (mask != 0)
+            {
+                int reg = HighestBitSet(mask);
+
+                mask &= ~(1 << reg);
+
+                if (mask != 0)
+                {
+                    int reg2 = HighestBitSet(mask);
+
+                    mask &= ~(1 << reg2);
+
+                    offset -= type.GetSizeInBytes() * 2;
+
+                    if (offset != 0)
+                    {
+                        asm.LdpRiUn(Register(reg2, type), Register(reg, type), Register(Assembler.SpRegister), offset);
+                    }
+                    else if (calleeSaveRegionSize < Encodable9BitsOffsetLimit)
+                    {
+                        asm.LdpRiPost(Register(reg2, type), Register(reg, type), Register(Assembler.SpRegister), calleeSaveRegionSize);
+                    }
+                    else
+                    {
+                        asm.LdpRiUn(Register(reg2, type), Register(reg, type), Register(Assembler.SpRegister), 0);
+                        asm.Add(Register(Assembler.SpRegister), Register(Assembler.SpRegister), new Operand(OperandType.I64, (ulong)calleeSaveRegionSize));
+                    }
+                }
+                else
+                {
+                    offset -= type.GetSizeInBytes();
+
+                    if (offset != 0)
+                    {
+                        asm.LdrRiUn(Register(reg, type), Register(Assembler.SpRegister), offset);
+                    }
+                    else  if (calleeSaveRegionSize < Encodable9BitsOffsetLimit)
+                    {
+                        asm.LdrRiPost(Register(reg, type), Register(Assembler.SpRegister), calleeSaveRegionSize);
+                    }
+                    else
+                    {
+                        asm.LdrRiUn(Register(reg, type), Register(Assembler.SpRegister), 0);
+                        asm.Add(Register(Assembler.SpRegister), Register(Assembler.SpRegister), new Operand(OperandType.I64, (ulong)calleeSaveRegionSize));
+                    }
+                }
+            }
+        }
+
+        private static int HighestBitSet(int value)
+        {
+            return 31 - BitOperations.LeadingZeroCount((uint)value);
+        }
+
+        private static Operand Register(int register, OperandType type = OperandType.I64)
+        {
+            return new Operand(register, RegisterType.Integer, type);
+        }
+
+        private static int Align16(int value)
+        {
+            return (value + 0xf) & ~0xf;
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Ryujinx.Cpu/Nce/Arm64/RegisterType.cs b/src/Ryujinx.Cpu/Nce/Arm64/RegisterType.cs
new file mode 100644
index 000000000..f53090e66
--- /dev/null
+++ b/src/Ryujinx.Cpu/Nce/Arm64/RegisterType.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Cpu.Nce.Arm64
+{
+    enum RegisterType
+    {
+        Integer,
+        Vector,
+    }
+}
diff --git a/src/Ryujinx.Cpu/Nce/NceAsmTable.cs b/src/Ryujinx.Cpu/Nce/NceAsmTable.cs
deleted file mode 100644
index 72e2d9780..000000000
--- a/src/Ryujinx.Cpu/Nce/NceAsmTable.cs
+++ /dev/null
@@ -1,409 +0,0 @@
-using System;
-
-namespace Ryujinx.Cpu.Nce
-{
-    static class NceAsmTable
-    {
-        public static uint[] GetTpidrEl0Code = new uint[]
-        {
-            GetMrsTpidrEl0(0), // mrs x0, tpidr_el0
-            0xd65f03c0u, // ret
-        };
-
-        public static uint[] ThreadStartCode = new uint[]
-        {
-            0xa9ae53f3u, // stp x19, x20, [sp, #-288]!
-            0xa9015bf5u, // stp x21, x22, [sp, #16]
-            0xa90263f7u, // stp x23, x24, [sp, #32]
-            0xa9036bf9u, // stp x25, x26, [sp, #48]
-            0xa90473fbu, // stp x27, x28, [sp, #64]
-            0xa9057bfdu, // stp x29, x30, [sp, #80]
-            0x6d0627e8u, // stp d8, d9, [sp, #96]
-            0x6d072feau, // stp d10, d11, [sp, #112]
-            0x6d0837ecu, // stp d12, d13, [sp, #128]
-            0x6d093feeu, // stp d14, d15, [sp, #144]
-            0x6d0a47f0u, // stp d16, d17, [sp, #160]
-            0x6d0b4ff2u, // stp d18, d19, [sp, #176]
-            0x6d0c57f4u, // stp d20, d21, [sp, #192]
-            0x6d0d5ff6u, // stp d22, d23, [sp, #208]
-            0x6d0e67f8u, // stp d24, d25, [sp, #224]
-            0x6d0f6ffau, // stp d26, d27, [sp, #240]
-            0x6d1077fcu, // stp d28, d29, [sp, #256]
-            0x6d117ffeu, // stp d30, d31, [sp, #272]
-            0xb9031c1fu, // str wzr, [x0, #796]
-            0x910003e1u, // mov x1, sp
-            0xf9019001u, // str x1, [x0, #800]
-            0xa9410c02u, // ldp x2, x3, [x0, #16]
-            0xa9421404u, // ldp x4, x5, [x0, #32]
-            0xa9431c06u, // ldp x6, x7, [x0, #48]
-            0xa9442408u, // ldp x8, x9, [x0, #64]
-            0xa9452c0au, // ldp x10, x11, [x0, #80]
-            0xa946340cu, // ldp x12, x13, [x0, #96]
-            0xa9473c0eu, // ldp x14, x15, [x0, #112]
-            0xa9484410u, // ldp x16, x17, [x0, #128]
-            0xa9494c12u, // ldp x18, x19, [x0, #144]
-            0xa94a5414u, // ldp x20, x21, [x0, #160]
-            0xa94b5c16u, // ldp x22, x23, [x0, #176]
-            0xa94c6418u, // ldp x24, x25, [x0, #192]
-            0xa94d6c1au, // ldp x26, x27, [x0, #208]
-            0xa94e741cu, // ldp x28, x29, [x0, #224]
-            0xad480400u, // ldp q0, q1, [x0, #256]
-            0xad490c02u, // ldp q2, q3, [x0, #288]
-            0xad4a1404u, // ldp q4, q5, [x0, #320]
-            0xad4b1c06u, // ldp q6, q7, [x0, #352]
-            0xad4c2408u, // ldp q8, q9, [x0, #384]
-            0xad4d2c0au, // ldp q10, q11, [x0, #416]
-            0xad4e340cu, // ldp q12, q13, [x0, #448]
-            0xad4f3c0eu, // ldp q14, q15, [x0, #480]
-            0xad504410u, // ldp q16, q17, [x0, #512]
-            0xad514c12u, // ldp q18, q19, [x0, #544]
-            0xad525414u, // ldp q20, q21, [x0, #576]
-            0xad535c16u, // ldp q22, q23, [x0, #608]
-            0xad546418u, // ldp q24, q25, [x0, #640]
-            0xad556c1au, // ldp q26, q27, [x0, #672]
-            0xad56741cu, // ldp q28, q29, [x0, #704]
-            0xad577c1eu, // ldp q30, q31, [x0, #736]
-            0xa94f041eu, // ldp x30, x1, [x0, #240]
-            0x9100003fu, // mov sp, x1
-            0xa9400400u, // ldp x0, x1, [x0]
-            0xd61f03c0u, // br x30
-        };
-
-        public static uint[] ExceptionHandlerEntryCode = new uint[]
-        {
-            0xa9bc53f3u, // stp x19, x20, [sp, #-64]!
-            0xa9015bf5u, // stp x21, x22, [sp, #16]
-            0xa90263f7u, // stp x23, x24, [sp, #32]
-            0xf9001bf9u, // str x25, [sp, #48]
-            0xaa0003f3u, // mov x19, x0
-            0xaa0103f4u, // mov x20, x1
-            0xaa0203f5u, // mov x21, x2
-            0x910003f6u, // mov x22, sp
-            0xaa1e03f7u, // mov x23, x30
-            0xd2800018u, // mov x24, #0x0
-            0xf2a00018u, // movk x24, #0x0, lsl #16
-            0xf2c00018u, // movk x24, #0x0, lsl #32
-            0xf2e00018u, // movk x24, #0x0, lsl #48
-            0xf85f8319u, // ldur x25, [x24, #-8]
-            0x8b191319u, // add x25, x24, x25, lsl #4
-            GetMrsTpidrEl0(1), // mrs x1, tpidr_el0
-            0xeb19031fu, // cmp x24, x25
-            0x540000a0u, // b.eq 13c <ExceptionHandlerEntryCode+0x58>
-            0xf8410702u, // ldr x2, [x24], #16
-            0xeb02003fu, // cmp x1, x2
-            0x54000080u, // b.eq 144 <ExceptionHandlerEntryCode+0x60>
-            0x17fffffbu, // b 124 <ExceptionHandlerEntryCode+0x40>
-            0xd2800018u, // mov x24, #0x0
-            0x14000002u, // b 148 <ExceptionHandlerEntryCode+0x64>
-            0xf85f8318u, // ldur x24, [x24, #-8]
-            0xb4000438u, // cbz x24, 1cc <ExceptionHandlerEntryCode+0xe8>
-            0xf9419300u, // ldr x0, [x24, #800]
-            0x9100001fu, // mov sp, x0
-            0x7100027fu, // cmp w19, #0x0
-            0x54000180u, // b.eq 188 <ExceptionHandlerEntryCode+0xa4>
-            0x52800020u, // mov w0, #0x1
-            0xb9031f00u, // str w0, [x24, #796]
-            0xaa1303e0u, // mov x0, x19
-            0xaa1403e1u, // mov x1, x20
-            0xaa1503e2u, // mov x2, x21
-            0xd2800008u, // mov x8, #0x0
-            0xf2a00008u, // movk x8, #0x0, lsl #16
-            0xf2c00008u, // movk x8, #0x0, lsl #32
-            0xf2e00008u, // movk x8, #0x0, lsl #48
-            0xd63f0100u, // blr x8
-            0x1400000au, // b 1ac <ExceptionHandlerEntryCode+0xc8>
-            0xb9431f00u, // ldr w0, [x24, #796]
-            0x35000120u, // cbnz w0, 1b0 <ExceptionHandlerEntryCode+0xcc>
-            0x52800020u, // mov w0, #0x1
-            0xb9031f00u, // str w0, [x24, #796]
-            0xd2800000u, // mov x0, #0x0
-            0xf2a00000u, // movk x0, #0x0, lsl #16
-            0xf2c00000u, // movk x0, #0x0, lsl #32
-            0xf2e00000u, // movk x0, #0x0, lsl #48
-            0xd63f0000u, // blr x0
-            0xb9031f1fu, // str wzr, [x24, #796]
-            0x910002dfu, // mov sp, x22
-            0xaa1703feu, // mov x30, x23
-            0xa9415bf5u, // ldp x21, x22, [sp, #16]
-            0xa94263f7u, // ldp x23, x24, [sp, #32]
-            0xa9436bf9u, // ldp x25, x26, [sp, #48]
-            0xa8c453f3u, // ldp x19, x20, [sp], #64
-            0xd65f03c0u, // ret
-            0xaa1303e0u, // mov x0, x19
-            0xaa1403e1u, // mov x1, x20
-            0xaa1503e2u, // mov x2, x21
-            0x910002dfu, // mov sp, x22
-            0xa9415bf5u, // ldp x21, x22, [sp, #16]
-            0xa94263f7u, // ldp x23, x24, [sp, #32]
-            0xf9401bf9u, // ldr x25, [sp, #48]
-            0xa8c453f3u, // ldp x19, x20, [sp], #64
-            0xd2800003u, // mov x3, #0x0
-            0xf2a00003u, // movk x3, #0x0, lsl #16
-            0xf2c00003u, // movk x3, #0x0, lsl #32
-            0xf2e00003u, // movk x3, #0x0, lsl #48
-            0xd61f0060u, // br x3
-        };
-
-        public static uint[] SvcPatchCode = new uint[]
-        {
-            0xa9be53f3u, // stp x19, x20, [sp, #-32]!
-            0xf9000bf5u, // str x21, [sp, #16]
-            0xd2800013u, // mov x19, #0x0
-            0xf2a00013u, // movk x19, #0x0, lsl #16
-            0xf2c00013u, // movk x19, #0x0, lsl #32
-            0xf2e00013u, // movk x19, #0x0, lsl #48
-            GetMrsTpidrEl0(20), // mrs x20, tpidr_el0
-            0xf8410675u, // ldr x21, [x19], #16
-            0xeb15029fu, // cmp x20, x21
-            0x54000040u, // b.eq 22c <SvcPatchCode+0x2c>
-            0x17fffffdu, // b 21c <SvcPatchCode+0x1c>
-            0xf85f8273u, // ldur x19, [x19, #-8]
-            0xa9000660u, // stp x0, x1, [x19]
-            0xa9010e62u, // stp x2, x3, [x19, #16]
-            0xa9021664u, // stp x4, x5, [x19, #32]
-            0xa9031e66u, // stp x6, x7, [x19, #48]
-            0xa9042668u, // stp x8, x9, [x19, #64]
-            0xa9052e6au, // stp x10, x11, [x19, #80]
-            0xa906366cu, // stp x12, x13, [x19, #96]
-            0xa9073e6eu, // stp x14, x15, [x19, #112]
-            0xa9084670u, // stp x16, x17, [x19, #128]
-            0xf9400bf5u, // ldr x21, [sp, #16]
-            0xa8c253e0u, // ldp x0, x20, [sp], #32
-            0xa9090272u, // stp x18, x0, [x19, #144]
-            0xa90a5674u, // stp x20, x21, [x19, #160]
-            0xa90b5e76u, // stp x22, x23, [x19, #176]
-            0xa90c6678u, // stp x24, x25, [x19, #192]
-            0xa90d6e7au, // stp x26, x27, [x19, #208]
-            0xa90e767cu, // stp x28, x29, [x19, #224]
-            0x910003e0u, // mov x0, sp
-            0xa90f027eu, // stp x30, x0, [x19, #240]
-            0xad080660u, // stp q0, q1, [x19, #256]
-            0xad090e62u, // stp q2, q3, [x19, #288]
-            0xad0a1664u, // stp q4, q5, [x19, #320]
-            0xad0b1e66u, // stp q6, q7, [x19, #352]
-            0xad0c2668u, // stp q8, q9, [x19, #384]
-            0xad0d2e6au, // stp q10, q11, [x19, #416]
-            0xad0e366cu, // stp q12, q13, [x19, #448]
-            0xad0f3e6eu, // stp q14, q15, [x19, #480]
-            0xad104670u, // stp q16, q17, [x19, #512]
-            0xad114e72u, // stp q18, q19, [x19, #544]
-            0xad125674u, // stp q20, q21, [x19, #576]
-            0xad135e76u, // stp q22, q23, [x19, #608]
-            0xad146678u, // stp q24, q25, [x19, #640]
-            0xad156e7au, // stp q26, q27, [x19, #672]
-            0xad16767cu, // stp q28, q29, [x19, #704]
-            0xad177e7eu, // stp q30, q31, [x19, #736]
-            0xf9419260u, // ldr x0, [x19, #800]
-            0x9100001fu, // mov sp, x0
-            0x52800020u, // mov w0, #0x1
-            0xb9031e60u, // str w0, [x19, #796]
-            0x52800000u, // mov w0, #0x0
-            0xf941aa68u, // ldr x8, [x19, #848]
-            0xd63f0100u, // blr x8
-            0x35000280u, // cbnz w0, 328 <SvcPatchCode+0x128>
-            0x6d517ffeu, // ldp d30, d31, [sp, #272]
-            0x6d5077fcu, // ldp d28, d29, [sp, #256]
-            0x6d4f6ffau, // ldp d26, d27, [sp, #240]
-            0x6d4e67f8u, // ldp d24, d25, [sp, #224]
-            0x6d4d5ff6u, // ldp d22, d23, [sp, #208]
-            0x6d4c57f4u, // ldp d20, d21, [sp, #192]
-            0x6d4b4ff2u, // ldp d18, d19, [sp, #176]
-            0x6d4a47f0u, // ldp d16, d17, [sp, #160]
-            0x6d493feeu, // ldp d14, d15, [sp, #144]
-            0x6d4837ecu, // ldp d12, d13, [sp, #128]
-            0x6d472feau, // ldp d10, d11, [sp, #112]
-            0x6d4627e8u, // ldp d8, d9, [sp, #96]
-            0xa9457bfdu, // ldp x29, x30, [sp, #80]
-            0xa94473fbu, // ldp x27, x28, [sp, #64]
-            0xa9436bf9u, // ldp x25, x26, [sp, #48]
-            0xa94263f7u, // ldp x23, x24, [sp, #32]
-            0xa9415bf5u, // ldp x21, x22, [sp, #16]
-            0xa8d253f3u, // ldp x19, x20, [sp], #288
-            0xd65f03c0u, // ret
-            0xb9031e7fu, // str wzr, [x19, #796]
-            0xa94f027eu, // ldp x30, x0, [x19, #240]
-            0x9100001fu, // mov sp, x0
-            0xa9400660u, // ldp x0, x1, [x19]
-            0xa9410e62u, // ldp x2, x3, [x19, #16]
-            0xa9421664u, // ldp x4, x5, [x19, #32]
-            0xa9431e66u, // ldp x6, x7, [x19, #48]
-            0xa9442668u, // ldp x8, x9, [x19, #64]
-            0xa9452e6au, // ldp x10, x11, [x19, #80]
-            0xa946366cu, // ldp x12, x13, [x19, #96]
-            0xa9473e6eu, // ldp x14, x15, [x19, #112]
-            0xa9484670u, // ldp x16, x17, [x19, #128]
-            0xf9404a72u, // ldr x18, [x19, #144]
-            0xa94a5674u, // ldp x20, x21, [x19, #160]
-            0xa94b5e76u, // ldp x22, x23, [x19, #176]
-            0xa94c6678u, // ldp x24, x25, [x19, #192]
-            0xa94d6e7au, // ldp x26, x27, [x19, #208]
-            0xa94e767cu, // ldp x28, x29, [x19, #224]
-            0xad480660u, // ldp q0, q1, [x19, #256]
-            0xad490e62u, // ldp q2, q3, [x19, #288]
-            0xad4a1664u, // ldp q4, q5, [x19, #320]
-            0xad4b1e66u, // ldp q6, q7, [x19, #352]
-            0xad4c2668u, // ldp q8, q9, [x19, #384]
-            0xad4d2e6au, // ldp q10, q11, [x19, #416]
-            0xad4e366cu, // ldp q12, q13, [x19, #448]
-            0xad4f3e6eu, // ldp q14, q15, [x19, #480]
-            0xad504670u, // ldp q16, q17, [x19, #512]
-            0xad514e72u, // ldp q18, q19, [x19, #544]
-            0xad525674u, // ldp q20, q21, [x19, #576]
-            0xad535e76u, // ldp q22, q23, [x19, #608]
-            0xad546678u, // ldp q24, q25, [x19, #640]
-            0xad556e7au, // ldp q26, q27, [x19, #672]
-            0xad56767cu, // ldp q28, q29, [x19, #704]
-            0xad577e7eu, // ldp q30, q31, [x19, #736]
-            0xf9404e73u, // ldr x19, [x19, #152]
-            0x14000000u, // b 3b4 <SvcPatchCode+0x1b4>
-        };
-
-        public static uint[] MrsTpidrroEl0PatchCode = new uint[]
-        {
-            0xa9be4fffu, // stp xzr, x19, [sp, #-32]!
-            0xa90157f4u, // stp x20, x21, [sp, #16]
-            0xd2800013u, // mov x19, #0x0
-            0xf2a00013u, // movk x19, #0x0, lsl #16
-            0xf2c00013u, // movk x19, #0x0, lsl #32
-            0xf2e00013u, // movk x19, #0x0, lsl #48
-            GetMrsTpidrEl0(20), // mrs x20, tpidr_el0
-            0xf8410675u, // ldr x21, [x19], #16
-            0xeb15029fu, // cmp x20, x21
-            0x54000040u, // b.eq 3e4 <MrsTpidrroEl0PatchCode+0x2c>
-            0x17fffffdu, // b 3d4 <MrsTpidrroEl0PatchCode+0x1c>
-            0xf85f8273u, // ldur x19, [x19, #-8]
-            0xf9418673u, // ldr x19, [x19, #776]
-            0xf90003f3u, // str x19, [sp]
-            0xa94157f4u, // ldp x20, x21, [sp, #16]
-            0xf94007f3u, // ldr x19, [sp, #8]
-            0xf84207e0u, // ldr x0, [sp], #32
-            0x14000000u, // b 3fc <MrsTpidrroEl0PatchCode+0x44>
-        };
-
-        public static uint[] MrsTpidrEl0PatchCode = new uint[]
-        {
-            0xa9be4fffu, // stp xzr, x19, [sp, #-32]!
-            0xa90157f4u, // stp x20, x21, [sp, #16]
-            0xd2800013u, // mov x19, #0x0
-            0xf2a00013u, // movk x19, #0x0, lsl #16
-            0xf2c00013u, // movk x19, #0x0, lsl #32
-            0xf2e00013u, // movk x19, #0x0, lsl #48
-            GetMrsTpidrEl0(20), // mrs x20, tpidr_el0
-            0xf8410675u, // ldr x21, [x19], #16
-            0xeb15029fu, // cmp x20, x21
-            0x54000040u, // b.eq 42c <MrsTpidrEl0PatchCode+0x2c>
-            0x17fffffdu, // b 41c <MrsTpidrEl0PatchCode+0x1c>
-            0xf85f8273u, // ldur x19, [x19, #-8]
-            0xf9418273u, // ldr x19, [x19, #768]
-            0xf90003f3u, // str x19, [sp]
-            0xa94157f4u, // ldp x20, x21, [sp, #16]
-            0xf94007f3u, // ldr x19, [sp, #8]
-            0xf84207e0u, // ldr x0, [sp], #32
-            0x14000000u, // b 444 <MrsTpidrEl0PatchCode+0x44>
-        };
-
-        public static uint[] MrsCtrEl0PatchCode = new uint[]
-        {
-            0xa9be4fffu, // stp xzr, x19, [sp, #-32]!
-            0xa90157f4u, // stp x20, x21, [sp, #16]
-            0xd2800013u, // mov x19, #0x0
-            0xf2a00013u, // movk x19, #0x0, lsl #16
-            0xf2c00013u, // movk x19, #0x0, lsl #32
-            0xf2e00013u, // movk x19, #0x0, lsl #48
-            GetMrsTpidrEl0(20), // mrs x20, tpidr_el0
-            0xf8410675u, // ldr x21, [x19], #16
-            0xeb15029fu, // cmp x20, x21
-            0x54000040u, // b.eq 474 <MrsCtrEl0PatchCode+0x2c>
-            0x17fffffdu, // b 464 <MrsCtrEl0PatchCode+0x1c>
-            0xf85f8273u, // ldur x19, [x19, #-8]
-            0xf9419e73u, // ldr x19, [x19, #824]
-            0xf90003f3u, // str x19, [sp]
-            0xa94157f4u, // ldp x20, x21, [sp, #16]
-            0xf94007f3u, // ldr x19, [sp, #8]
-            0xf84207e0u, // ldr x0, [sp], #32
-            0x14000000u, // b 48c <MrsCtrEl0PatchCode+0x44>
-        };
-
-        public static uint[] MsrTpidrEl0PatchCode = new uint[]
-        {
-            0xa9be03f3u, // stp x19, x0, [sp, #-32]!
-            0xa90157f4u, // stp x20, x21, [sp, #16]
-            0xd2800013u, // mov x19, #0x0
-            0xf2a00013u, // movk x19, #0x0, lsl #16
-            0xf2c00013u, // movk x19, #0x0, lsl #32
-            0xf2e00013u, // movk x19, #0x0, lsl #48
-            GetMrsTpidrEl0(20), // mrs x20, tpidr_el0
-            0xf8410675u, // ldr x21, [x19], #16
-            0xeb15029fu, // cmp x20, x21
-            0x54000040u, // b.eq 4bc <MsrTpidrEl0PatchCode+0x2c>
-            0x17fffffdu, // b 4ac <MsrTpidrEl0PatchCode+0x1c>
-            0xf85f8273u, // ldur x19, [x19, #-8]
-            0xf94007f4u, // ldr x20, [sp, #8]
-            0xf9018274u, // str x20, [x19, #768]
-            0xa94157f4u, // ldp x20, x21, [sp, #16]
-            0xf84207f3u, // ldr x19, [sp], #32
-            0x14000000u, // b 4d0 <MsrTpidrEl0PatchCode+0x40>
-        };
-
-        public static uint[] MrsCntpctEl0PatchCode = new uint[]
-        {
-            0xa9b407e0u, // stp x0, x1, [sp, #-192]!
-            0xa9010fe2u, // stp x2, x3, [sp, #16]
-            0xa90217e4u, // stp x4, x5, [sp, #32]
-            0xa9031fe6u, // stp x6, x7, [sp, #48]
-            0xa90427e8u, // stp x8, x9, [sp, #64]
-            0xa9052feau, // stp x10, x11, [sp, #80]
-            0xa90637ecu, // stp x12, x13, [sp, #96]
-            0xa9073feeu, // stp x14, x15, [sp, #112]
-            0xa90847f0u, // stp x16, x17, [sp, #128]
-            0xa9094ff2u, // stp x18, x19, [sp, #144]
-            0xa90a57f4u, // stp x20, x21, [sp, #160]
-            0xf9005ffeu, // str x30, [sp, #184]
-            0xd2800013u, // mov x19, #0x0
-            0xf2a00013u, // movk x19, #0x0, lsl #16
-            0xf2c00013u, // movk x19, #0x0, lsl #32
-            0xf2e00013u, // movk x19, #0x0, lsl #48
-            GetMrsTpidrEl0(20), // mrs x20, tpidr_el0
-            0xf8410675u, // ldr x21, [x19], #16
-            0xeb15029fu, // cmp x20, x21
-            0x54000040u, // b.eq 528 <MrsCntpctEl0PatchCode+0x54>
-            0x17fffffdu, // b 518 <MrsCntpctEl0PatchCode+0x44>
-            0xf85f8273u, // ldur x19, [x19, #-8]
-            0x52800020u, // mov w0, #0x1
-            0xb9031e60u, // str w0, [x19, #796]
-            0xd2800000u, // mov x0, #0x0
-            0xf2a00000u, // movk x0, #0x0, lsl #16
-            0xf2c00000u, // movk x0, #0x0, lsl #32
-            0xf2e00000u, // movk x0, #0x0, lsl #48
-            0xd63f0000u, // blr x0
-            0xb9031e7fu, // str wzr, [x19, #796]
-            0xf9005be0u, // str x0, [sp, #176]
-            0xf9405ffeu, // ldr x30, [sp, #184]
-            0xa94a57f4u, // ldp x20, x21, [sp, #160]
-            0xa9494ff2u, // ldp x18, x19, [sp, #144]
-            0xa94847f0u, // ldp x16, x17, [sp, #128]
-            0xa9473feeu, // ldp x14, x15, [sp, #112]
-            0xa94637ecu, // ldp x12, x13, [sp, #96]
-            0xa9452feau, // ldp x10, x11, [sp, #80]
-            0xa94427e8u, // ldp x8, x9, [sp, #64]
-            0xa9431fe6u, // ldp x6, x7, [sp, #48]
-            0xa94217e4u, // ldp x4, x5, [sp, #32]
-            0xa9410fe2u, // ldp x2, x3, [sp, #16]
-            0xa8cb07e0u, // ldp x0, x1, [sp], #176
-            0xf84107e0u, // ldr x0, [sp], #16
-            0x14000000u, // b 584 <MrsCntpctEl0PatchCode+0xb0>
-        };
-
-        private static uint GetMrsTpidrEl0(uint rd)
-        {
-            if (OperatingSystem.IsMacOS())
-            {
-                return 0xd53bd060u | rd; // TPIDRRO
-            }
-            else
-            {
-                return 0xd53bd040u | rd; // TPIDR
-            }
-        }
-    }
-}
\ No newline at end of file
diff --git a/src/Ryujinx.Cpu/Nce/NceCpuContext.cs b/src/Ryujinx.Cpu/Nce/NceCpuContext.cs
index 04a97a696..ca2fcc8bb 100644
--- a/src/Ryujinx.Cpu/Nce/NceCpuContext.cs
+++ b/src/Ryujinx.Cpu/Nce/NceCpuContext.cs
@@ -1,15 +1,63 @@
-using ARMeilleure.Memory;
-using ARMeilleure.Signal;
+using ARMeilleure.Signal;
 using Ryujinx.Cpu.Jit;
 using Ryujinx.Common;
 using Ryujinx.Memory;
 using System;
+using System.Collections.Generic;
 using System.Runtime.InteropServices;
 
 namespace Ryujinx.Cpu.Nce
 {
     class NceCpuContext : ICpuContext
     {
+        private static uint[] _getTpidrEl0Code = new uint[]
+        {
+            GetMrsTpidrEl0(0), // mrs x0, tpidr_el0
+            0xd65f03c0u, // ret
+        };
+
+        private static uint GetMrsTpidrEl0(uint rd)
+        {
+            if (OperatingSystem.IsMacOS())
+            {
+                return 0xd53bd060u | rd; // TPIDRRO
+            }
+            else
+            {
+                return 0xd53bd040u | rd; // TPIDR
+            }
+        }
+
+        readonly struct CodeWriter
+        {
+            private readonly List<uint> _fullCode;
+
+            public CodeWriter()
+            {
+                _fullCode = new List<uint>();
+            }
+
+            public ulong Write(uint[] code)
+            {
+                ulong offset = (ulong)_fullCode.Count * sizeof(uint);
+                _fullCode.AddRange(code);
+
+                return offset;
+            }
+
+            public MemoryBlock CreateMemoryBlock()
+            {
+                ReadOnlySpan<byte> codeBytes = MemoryMarshal.Cast<uint, byte>(_fullCode.ToArray());
+
+                MemoryBlock codeBlock = new(BitUtils.AlignUp((ulong)codeBytes.Length, 0x1000UL));
+
+                codeBlock.Write(0, codeBytes);
+                codeBlock.Reprotect(0, (ulong)codeBytes.Length, MemoryPermission.ReadAndExecute, true);
+
+                return codeBlock;
+            }
+        }
+
         private delegate void ThreadStart(IntPtr nativeContextPtr);
         private delegate IntPtr GetTpidrEl0();
         private static MemoryBlock _codeBlock;
@@ -21,31 +69,31 @@ namespace Ryujinx.Cpu.Nce
 
         static NceCpuContext()
         {
-            ulong threadStartCodeSize = (ulong)NceAsmTable.ThreadStartCode.Length * 4;
-            ulong enEntryCodeOffset = threadStartCodeSize;
-            ulong ehEntryCodeSize = (ulong)NceAsmTable.ExceptionHandlerEntryCode.Length * 4;
-            ulong getTpidrEl0CodeOffset = threadStartCodeSize + ehEntryCodeSize;
-            ulong getTpidrEl0CodeSize = (ulong)NceAsmTable.GetTpidrEl0Code.Length * 4;
+            CodeWriter codeWriter = new();
 
-            ulong size = BitUtils.AlignUp(threadStartCodeSize + ehEntryCodeSize + getTpidrEl0CodeSize, 0x1000UL);
+            uint[] threadStartCode = NcePatcher.GenerateThreadStartCode();
+            uint[] ehSuspendCode = NcePatcher.GenerateSuspendExceptionHandler();
 
-            MemoryBlock codeBlock = new MemoryBlock(size);
+            ulong threadStartCodeOffset = codeWriter.Write(threadStartCode);
+            ulong getTpidrEl0CodeOffset = codeWriter.Write(_getTpidrEl0Code);
+            ulong ehSuspendCodeOffset = codeWriter.Write(ehSuspendCode);
 
-            codeBlock.Write(0, MemoryMarshal.Cast<uint, byte>(NceAsmTable.ThreadStartCode.AsSpan()));
-            codeBlock.Write(getTpidrEl0CodeOffset, MemoryMarshal.Cast<uint, byte>(NceAsmTable.GetTpidrEl0Code.AsSpan()));
+            MemoryBlock codeBlock = null;
 
             NativeSignalHandler.Initialize(new JitMemoryAllocator());
 
             NativeSignalHandler.InitializeSignalHandler(MemoryBlock.GetPageSize(), (IntPtr oldSignalHandlerSegfaultPtr, IntPtr signalHandlerPtr) =>
             {
-                uint[] ehEntryCode = NcePatcher.GenerateExceptionHandlerEntry(oldSignalHandlerSegfaultPtr, signalHandlerPtr);
-                codeBlock.Write(enEntryCodeOffset, MemoryMarshal.Cast<uint, byte>(ehEntryCode.AsSpan()));
-                codeBlock.Reprotect(0, size, MemoryPermission.ReadAndExecute, true);
-                return codeBlock.GetPointer(enEntryCodeOffset, ehEntryCodeSize);
-            }, NceThreadPal.UnixSuspendSignal);
+                uint[] ehWrapperCode = NcePatcher.GenerateWrapperExceptionHandler(oldSignalHandlerSegfaultPtr, signalHandlerPtr);
+                ulong ehWrapperCodeOffset = codeWriter.Write(ehWrapperCode);
+                codeBlock = codeWriter.CreateMemoryBlock();
+                return codeBlock.GetPointer(ehWrapperCodeOffset, (ulong)ehWrapperCode.Length * sizeof(uint));
+            });
 
-            _threadStart = Marshal.GetDelegateForFunctionPointer<ThreadStart>(codeBlock.GetPointer(0, threadStartCodeSize));
-            _getTpidrEl0 = Marshal.GetDelegateForFunctionPointer<GetTpidrEl0>(codeBlock.GetPointer(getTpidrEl0CodeOffset, getTpidrEl0CodeSize));
+            NativeSignalHandler.InstallUnixSignalHandler(NceThreadPal.UnixSuspendSignal, codeBlock.GetPointer(ehSuspendCodeOffset, (ulong)ehSuspendCode.Length * sizeof(uint)));
+
+            _threadStart = Marshal.GetDelegateForFunctionPointer<ThreadStart>(codeBlock.GetPointer(threadStartCodeOffset, (ulong)threadStartCode.Length * sizeof(uint)));
+            _getTpidrEl0 = Marshal.GetDelegateForFunctionPointer<GetTpidrEl0>(codeBlock.GetPointer(getTpidrEl0CodeOffset, (ulong)_getTpidrEl0Code.Length * sizeof(uint)));
             _codeBlock = codeBlock;
         }
 
@@ -69,7 +117,9 @@ namespace Ryujinx.Cpu.Nce
             int tableIndex = NceThreadTable.Register(_getTpidrEl0(), nec.NativeContextPtr);
 
             nec.SetStartAddress(address);
+            nec.RegisterAlternateStack();
             _threadStart(nec.NativeContextPtr);
+            nec.UnregisterAlternateStack();
 
             NceThreadTable.Unregister(tableIndex);
         }
diff --git a/src/Ryujinx.Cpu/Nce/NceExecutionContext.cs b/src/Ryujinx.Cpu/Nce/NceExecutionContext.cs
index 8746cac33..402e44a2d 100644
--- a/src/Ryujinx.Cpu/Nce/NceExecutionContext.cs
+++ b/src/Ryujinx.Cpu/Nce/NceExecutionContext.cs
@@ -1,11 +1,16 @@
+using ARMeilleure.Signal;
 using ARMeilleure.State;
+using Ryujinx.Memory;
 using System;
 using System.Runtime.InteropServices;
+using System.Threading;
 
 namespace Ryujinx.Cpu.Nce
 {
     class NceExecutionContext : IExecutionContext
     {
+        private const ulong AlternateStackSize = 0x4000;
+
         private readonly NceNativeContext _context;
         private readonly ExceptionCallbacks _exceptionCallbacks;
 
@@ -60,6 +65,8 @@ namespace Ryujinx.Cpu.Nce
         private delegate bool SupervisorCallHandler(int imm);
         private SupervisorCallHandler _svcHandler;
 
+        private MemoryBlock _alternateStackMemory;
+
         public NceExecutionContext(ExceptionCallbacks exceptionCallbacks)
         {
             _svcHandler = OnSupervisorCall;
@@ -97,6 +104,22 @@ namespace Ryujinx.Cpu.Nce
             storage.HostThreadHandle = NceThreadPal.GetCurrentThreadHandle();
         }
 
+        public void RegisterAlternateStack()
+        {
+            // We need to use an alternate stack to handle the suspend signal,
+            // as the guest stack may be in a state that is not suitable for the signal handlers.
+
+            _alternateStackMemory = new MemoryBlock(AlternateStackSize);
+            NativeSignalHandler.InstallUnixAlternateStackForCurrentThread(_alternateStackMemory.GetPointer(0UL, AlternateStackSize), AlternateStackSize);
+        }
+
+        public void UnregisterAlternateStack()
+        {
+            NativeSignalHandler.UninstallUnixAlternateStackForCurrentThread();
+            _alternateStackMemory.Dispose();
+            _alternateStackMemory = null;
+        }
+
         public bool OnSupervisorCall(int imm)
         {
             _exceptionCallbacks.SupervisorCallback?.Invoke(this, 0UL, imm);
@@ -114,7 +137,18 @@ namespace Ryujinx.Cpu.Nce
             IntPtr threadHandle = _context.GetStorage().HostThreadHandle;
             if (threadHandle != IntPtr.Zero)
             {
-                NceThreadPal.SuspendThread(threadHandle);
+                // Bit 0 set means that the thread is currently running managed code.
+                // Bit 1 set means that an interrupt was requested for the thread.
+                // This, we only need to send the suspend signal if the value was 0 (not running managed code,
+                // and no interrupt was requested before).
+
+                ref uint inManaged = ref _context.GetStorage().InManaged;
+                uint oldValue = Interlocked.Or(ref inManaged, 2);
+
+                if (oldValue == 0)
+                {
+                    NceThreadPal.SuspendThread(threadHandle);
+                }
             }
         }
 
diff --git a/src/Ryujinx.Cpu/Nce/NceNativeContext.cs b/src/Ryujinx.Cpu/Nce/NceNativeContext.cs
index 4db41f347..a7d5f4eec 100644
--- a/src/Ryujinx.Cpu/Nce/NceNativeContext.cs
+++ b/src/Ryujinx.Cpu/Nce/NceNativeContext.cs
@@ -12,21 +12,21 @@ namespace Ryujinx.Cpu.Nce
         {
             public Array32<ulong> X;
             public Array32<V128> V;
-            public ulong TpidrEl0; // 0x300
-            public ulong TpidrroEl0; // 0x308
-            public uint Pstate; // 0x310
-            public uint Fpcr; // 0x314
-            public uint Fpsr; // 0x318
-            public uint InManaged; // 0x31C
-            public ulong HostSp; // 0x320
-            public IntPtr HostThreadHandle; // 0x328
-            public ulong HostX30; // 0x330
-            public ulong CtrEl0; // 0x338
-            public ulong Reserved340; // 0x340
-            public ulong Reserved348; // 0x348
-            public IntPtr SvcCallHandler; // 0x350
+            public ulong TpidrEl0;
+            public ulong TpidrroEl0;
+            public ulong CtrEl0;
+            public uint Pstate;
+            public uint Fpcr;
+            public uint Fpsr;
+            public uint InManaged;
+            public ulong HostSp;
+            public IntPtr HostThreadHandle;
+            public ulong TempStorage;
+            public IntPtr SvcCallHandler;
         }
 
+        private static NativeCtxStorage _dummyStorage = new();
+
         private readonly MemoryBlock _block;
 
         public IntPtr BasePtr => _block.Pointer;
@@ -36,6 +36,61 @@ namespace Ryujinx.Cpu.Nce
             _block = new MemoryBlock((ulong)Unsafe.SizeOf<NativeCtxStorage>());
         }
 
+        public static int GetXOffset(int index)
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.X[index]);
+        }
+
+        public static int GetGuestSPOffset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.X[31]);
+        }
+
+        public static int GetVOffset(int index)
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.V[index]);
+        }
+
+        public static int GetTpidrEl0Offset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.TpidrEl0);
+        }
+
+        public static int GetTpidrroEl0Offset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.TpidrroEl0);
+        }
+
+        public static int GetInManagedOffset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.InManaged);
+        }
+
+        public static int GetHostSPOffset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.HostSp);
+        }
+
+        public static int GetCtrEl0Offset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.CtrEl0);
+        }
+
+        public static int GetTempStorageOffset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.TempStorage);
+        }
+
+        public static int GetSvcCallHandlerOffset()
+        {
+            return StorageOffset(ref _dummyStorage, ref _dummyStorage.SvcCallHandler);
+        }
+
+        private static int StorageOffset<T>(ref NativeCtxStorage storage, ref T target)
+        {
+            return (int)Unsafe.ByteOffset(ref Unsafe.As<NativeCtxStorage, T>(ref storage), ref target);
+        }
+
         public unsafe ref NativeCtxStorage GetStorage() => ref Unsafe.AsRef<NativeCtxStorage>((void*)_block.Pointer);
 
         public void Dispose() => _block.Dispose();
diff --git a/src/Ryujinx.Cpu/Nce/NcePatcher.cs b/src/Ryujinx.Cpu/Nce/NcePatcher.cs
index ee2c57578..4c8973063 100644
--- a/src/Ryujinx.Cpu/Nce/NcePatcher.cs
+++ b/src/Ryujinx.Cpu/Nce/NcePatcher.cs
@@ -1,13 +1,18 @@
+using Ryujinx.Cpu.Nce.Arm64;
 using Ryujinx.Common;
 using Ryujinx.Common.Logging;
 using Ryujinx.Memory;
 using System;
-using System.Runtime.InteropServices;
 
 namespace Ryujinx.Cpu.Nce
 {
     static class NcePatcher
     {
+        private const int ScratchBaseReg = 19;
+
+        private const uint IntCalleeSavedRegsMask = 0x1ff80000; // X19 to X28
+        private const uint FpCalleeSavedRegsMask = 0xff00; // D8 to D15
+
         private struct Context
         {
             public readonly ICpuMemoryManager MemoryManager;
@@ -76,18 +81,18 @@ namespace Ryujinx.Cpu.Nce
                     PatchInstruction(memoryManager, address, WriteMrsCtrEl0Patch(ref context, address, rd));
                     Logger.Debug?.Print(LogClass.Cpu, $"Patched MRS x{rd}, ctr_el0 at 0x{address:X}.");
                 }
-                else if ((inst & ~0x1f) == 0xd51bd040) // msr tpidr_el0, x0
-                {
-                    uint rd = inst & 0x1f;
-                    PatchInstruction(memoryManager, address, WriteMsrTpidrEl0Patch(ref context, address, rd));
-                    Logger.Debug?.Print(LogClass.Cpu, $"Patched MSR tpidr_el0, x{rd} at 0x{address:X}.");
-                }
                 else if ((inst & ~0x1f) == 0xd53be020) // mrs x0, cntpct_el0
                 {
                     uint rd = inst & 0x1f;
                     PatchInstruction(memoryManager, address, WriteMrsCntpctEl0Patch(ref context, address, rd));
                     Logger.Debug?.Print(LogClass.Cpu, $"Patched MRS x{rd}, cntpct_el0 at 0x{address:X}.");
                 }
+                else if ((inst & ~0x1f) == 0xd51bd040) // msr tpidr_el0, x0
+                {
+                    uint rd = inst & 0x1f;
+                    PatchInstruction(memoryManager, address, WriteMsrTpidrEl0Patch(ref context, address, rd));
+                    Logger.Debug?.Print(LogClass.Cpu, $"Patched MSR tpidr_el0, x{rd} at 0x{address:X}.");
+                }
 
                 address += 4;
             }
@@ -106,159 +111,242 @@ namespace Ryujinx.Cpu.Nce
 
         private static ulong WriteSvcPatch(ref Context context, ulong svcAddress, uint svcId)
         {
-            uint[] code = GetCopy(NceAsmTable.SvcPatchCode);
+            Assembler asm = new();
 
-            int movIndex = Array.IndexOf(code, 0xd2800013u);
+            WriteManagedCall(asm, (asm, ctx, tmp, tmp2) =>
+            {
+                for (int i = 0; i < 8; i++)
+                {
+                    asm.StrRiUn(Gpr(i), ctx, NceNativeContext.GetXOffset(i));
+                }
 
-            WritePointer(code, movIndex, (ulong)NceThreadTable.EntriesPointer);
+                WriteInManagedLockAcquire(asm, ctx, tmp, tmp2);
 
-            int mov2Index = Array.IndexOf(code, 0x52800000u, movIndex + 1);
+                asm.Mov(Gpr(0, OperandType.I32), svcId);
+                asm.LdrRiUn(tmp, ctx, NceNativeContext.GetSvcCallHandlerOffset());
+                asm.Blr(tmp);
 
-            ulong targetAddress = context.GetPatchWriteAddress(code.Length);
+                Operand lblContinue = asm.CreateLabel();
+                Operand lblQuit = asm.CreateLabel();
 
-            code[mov2Index] |= svcId << 5;
-            code[code.Length - 1] |= GetImm26(targetAddress + (ulong)(code.Length - 1) * 4, svcAddress + 4);
+                asm.Cbnz(Gpr(0, OperandType.I32), lblContinue);
 
-            WriteCode(context.MemoryManager, targetAddress, code);
+                asm.MarkLabel(lblQuit);
+
+                CreateRegisterSaveRestoreForManaged().WriteEpilogue(asm);
+
+                asm.Ret(Gpr(30));
+
+                asm.MarkLabel(lblContinue);
+
+                WriteInManagedLockRelease(asm, ctx, tmp, tmp2, ThreadExitMethod.Label, lblQuit);
+
+                for (int i = 0; i < 8; i++)
+                {
+                    asm.LdrRiUn(Gpr(i), ctx, NceNativeContext.GetXOffset(i));
+                }
+            }, 0xff);
+
+            ulong targetAddress = context.GetPatchWriteAddress(asm.CodeWords + sizeof(uint));
+
+            asm.B(GetOffset(targetAddress + (ulong)asm.CodeWords * sizeof(uint), svcAddress + sizeof(uint)));
+
+            WriteCode(context.MemoryManager, targetAddress, asm.GetCode());
 
             return targetAddress;
         }
 
         private static ulong WriteMrsTpidrroEl0Patch(ref Context context, ulong mrsAddress, uint rd)
         {
-            uint[] code = GetCopy(NceAsmTable.MrsTpidrroEl0PatchCode);
-
-            int movIndex = Array.IndexOf(code, 0xd2800013u);
-
-            WritePointer(code, movIndex, (ulong)NceThreadTable.EntriesPointer);
-
-            ulong targetAddress = context.GetPatchWriteAddress(code.Length);
-
-            code[code.Length - 2] |= rd;
-            code[code.Length - 1] |= GetImm26(targetAddress + (ulong)(code.Length - 1) * 4, mrsAddress + 4);
-
-            WriteCode(context.MemoryManager, targetAddress, code);
-
-            return targetAddress;
+            return WriteMrsContextRead(ref context, mrsAddress, rd, NceNativeContext.GetTpidrroEl0Offset());
         }
 
         private static ulong WriteMrsTpidrEl0Patch(ref Context context, ulong mrsAddress, uint rd)
         {
-            uint[] code = GetCopy(NceAsmTable.MrsTpidrEl0PatchCode);
-
-            int movIndex = Array.IndexOf(code, 0xd2800013u);
-
-            WritePointer(code, movIndex, (ulong)NceThreadTable.EntriesPointer);
-
-            ulong targetAddress = context.GetPatchWriteAddress(code.Length);
-
-            code[code.Length - 2] |= rd;
-            code[code.Length - 1] |= GetImm26(targetAddress + (ulong)(code.Length - 1) * 4, mrsAddress + 4);
-
-            WriteCode(context.MemoryManager, targetAddress, code);
-
-            return targetAddress;
+            return WriteMrsContextRead(ref context, mrsAddress, rd, NceNativeContext.GetTpidrEl0Offset());
         }
 
         private static ulong WriteMrsCtrEl0Patch(ref Context context, ulong mrsAddress, uint rd)
         {
-            uint[] code = GetCopy(NceAsmTable.MrsCtrEl0PatchCode);
+            return WriteMrsContextRead(ref context, mrsAddress, rd, NceNativeContext.GetCtrEl0Offset());
+        }
 
-            int movIndex = Array.IndexOf(code, 0xd2800013u);
+        private static ulong WriteMrsCntpctEl0Patch(ref Context context, ulong mrsAddress, uint rd)
+        {
+            Assembler asm = new();
 
-            WritePointer(code, movIndex, (ulong)NceThreadTable.EntriesPointer);
+            WriteManagedCall(asm, (asm, ctx, tmp, tmp2) =>
+            {
+                WriteInManagedLockAcquire(asm, ctx, tmp, tmp2);
 
-            ulong targetAddress = context.GetPatchWriteAddress(code.Length);
+                asm.Mov(tmp, (ulong)NceNativeInterface.GetTickCounterAccessFunctionPointer());
+                asm.Blr(tmp);
+                asm.StrRiUn(Gpr(0), ctx, NceNativeContext.GetTempStorageOffset());
 
-            code[code.Length - 2] |= rd;
-            code[code.Length - 1] |= GetImm26(targetAddress + (ulong)(code.Length - 1) * 4, mrsAddress + 4);
+                WriteInManagedLockRelease(asm, ctx, tmp, tmp2, ThreadExitMethod.GenerateReturn);
 
-            WriteCode(context.MemoryManager, targetAddress, code);
+                asm.LdrRiUn(Gpr((int)rd), ctx, NceNativeContext.GetTempStorageOffset());
+            }, 1u << (int)rd);
+
+            ulong targetAddress = context.GetPatchWriteAddress(asm.CodeWords + sizeof(uint));
+
+            asm.B(GetOffset(targetAddress + (ulong)asm.CodeWords * sizeof(uint), mrsAddress + sizeof(uint)));
+
+            WriteCode(context.MemoryManager, targetAddress, asm.GetCode());
 
             return targetAddress;
         }
 
         private static ulong WriteMsrTpidrEl0Patch(ref Context context, ulong msrAddress, uint rd)
         {
-            uint r2 = rd == 0 ? 1u : 0u;
+            Assembler asm = new();
 
-            uint[] code = GetCopy(NceAsmTable.MsrTpidrEl0PatchCode);
+            Span<int> scratchRegs = stackalloc int[3];
+            PickScratchRegs(scratchRegs, 1u << (int)rd);
 
-            code[0] |= rd << 10;
+            RegisterSaveRestore rsr = new((1 << scratchRegs[0]) | (1 << scratchRegs[1]) | (1 << scratchRegs[2]));
 
-            int movIndex = Array.IndexOf(code, 0xd2800013u);
+            rsr.WritePrologue(asm);
 
-            WritePointer(code, movIndex, (ulong)NceThreadTable.EntriesPointer);
+            WriteLoadContext(asm, Gpr(scratchRegs[0]), Gpr(scratchRegs[1]), Gpr(scratchRegs[2]));
+            asm.StrRiUn(Gpr((int)rd), Gpr(scratchRegs[0]),NceNativeContext.GetTpidrEl0Offset());
 
-            ulong targetAddress = context.GetPatchWriteAddress(code.Length);
+            rsr.WriteEpilogue(asm);
 
-            code[code.Length - 1] |= GetImm26(targetAddress + (ulong)(code.Length - 1) * 4, msrAddress + 4);
+            ulong targetAddress = context.GetPatchWriteAddress(asm.CodeWords + sizeof(uint));
 
-            WriteCode(context.MemoryManager, targetAddress, code);
+            asm.B(GetOffset(targetAddress + (ulong)asm.CodeWords * sizeof(uint), msrAddress + sizeof(uint)));
+
+            WriteCode(context.MemoryManager, targetAddress, asm.GetCode());
 
             return targetAddress;
         }
 
-        private static ulong WriteMrsCntpctEl0Patch(ref Context context, ulong mrsAddress, uint rd)
+        private static ulong WriteMrsContextRead(ref Context context, ulong mrsAddress, uint rd, int contextOffset)
         {
-            uint[] code = GetCopy(NceAsmTable.MrsCntpctEl0PatchCode);
+            Assembler asm = new();
 
-            int movIndex = Array.IndexOf(code, 0xd2800013u);
+            Span<int> scratchRegs = stackalloc int[3];
+            PickScratchRegs(scratchRegs, 1u << (int)rd);
 
-            WritePointer(code, movIndex, (ulong)NceThreadTable.EntriesPointer);
+            RegisterSaveRestore rsr = new((1 << scratchRegs[0]) | (1 << scratchRegs[1]) | (1 << scratchRegs[2]));
 
-            int mov2Index = Array.IndexOf(code, 0xD2800000u, movIndex + 1);
+            rsr.WritePrologue(asm);
 
-            WriteTickCounterAccessFunctionPointer(code, mov2Index);
+            WriteLoadContext(asm, Gpr(scratchRegs[0]), Gpr(scratchRegs[1]), Gpr(scratchRegs[2]));
+            asm.Add(Gpr((int)rd), Gpr(scratchRegs[0]), Const((ulong)contextOffset));
 
-            ulong targetAddress = context.GetPatchWriteAddress(code.Length);
+            rsr.WriteEpilogue(asm);
 
-            code[code.Length - 2] |= rd;
-            code[code.Length - 1] |= GetImm26(targetAddress + (ulong)(code.Length - 1) * 4, mrsAddress + 4);
+            asm.LdrRiUn(Gpr((int)rd), Gpr((int)rd), 0);
 
-            WriteCode(context.MemoryManager, targetAddress, code);
+            ulong targetAddress = context.GetPatchWriteAddress(asm.CodeWords + sizeof(uint));
+
+            asm.B(GetOffset(targetAddress + (ulong)asm.CodeWords * sizeof(uint), mrsAddress + sizeof(uint)));
+
+            WriteCode(context.MemoryManager, targetAddress, asm.GetCode());
 
             return targetAddress;
         }
 
-        public static uint[] GenerateExceptionHandlerEntry(IntPtr oldSignalHandlerSegfaultPtr, IntPtr signalHandlerPtr)
+        private static void WriteLoadContext(Assembler asm, Operand tmp0, Operand tmp1, Operand tmp2)
         {
-            uint[] code = GetCopy(NceAsmTable.ExceptionHandlerEntryCode);
+            asm.Mov(tmp0, (ulong)NceThreadTable.EntriesPointer);
 
-            int movIndex = Array.IndexOf(code, 0xd2800018u);
+            if (OperatingSystem.IsMacOS())
+            {
+                asm.MrsTpidrroEl0(tmp1);
+            }
+            else
+            {
+                asm.MrsTpidrEl0(tmp1);
+            }
 
-            WritePointer(code, movIndex, (ulong)NceThreadTable.EntriesPointer);
+            Operand lblFound = asm.CreateLabel();
+            Operand lblLoop = asm.CreateLabel();
 
-            int mov2Index = Array.IndexOf(code, 0xd2800008u, movIndex + 1);
+            asm.MarkLabel(lblLoop);
 
-            WritePointer(code, mov2Index, (ulong)signalHandlerPtr);
+            asm.LdrRiPost(tmp2, tmp0, 16);
+            asm.Cmp(tmp1, tmp2);
+            asm.B(lblFound, ArmCondition.Eq);
+            asm.B(lblLoop);
 
-            int mov3Index = Array.IndexOf(code, 0xd2800000u, mov2Index + 1);
+            asm.MarkLabel(lblFound);
 
-            WritePointer(code, mov3Index, (ulong)NceNativeInterface.GetSuspendThreadHandlerFunctionPointer());
-
-            int mov4Index = Array.IndexOf(code, 0xd2800003u, mov3Index + 1);
-
-            WritePointer(code, mov4Index, (ulong)oldSignalHandlerSegfaultPtr);
-
-            int cmpIndex = Array.IndexOf(code, 0x7100027fu);
-
-            code[cmpIndex] |= (uint)NceThreadPal.UnixSuspendSignal << 10;
-
-            return code;
+            asm.Ldur(tmp0, tmp0, -8);
         }
 
-        private static void WriteTickCounterAccessFunctionPointer(uint[] code, int movIndex)
+        private static void WriteLoadContextSafe(Assembler asm, Operand lblFail, Operand tmp0, Operand tmp1, Operand tmp2, Operand tmp3)
         {
-            WritePointer(code, movIndex, (ulong)NceNativeInterface.GetTickCounterAccessFunctionPointer());
+            asm.Mov(tmp0, (ulong)NceThreadTable.EntriesPointer);
+            asm.Ldur(tmp3, tmp0, -8);
+            asm.Add(tmp3, tmp0, tmp3, ArmShiftType.Lsl, 4);
+
+            if (OperatingSystem.IsMacOS())
+            {
+                asm.MrsTpidrroEl0(tmp1);
+            }
+            else
+            {
+                asm.MrsTpidrEl0(tmp1);
+            }
+
+            Operand lblFound = asm.CreateLabel();
+            Operand lblLoop = asm.CreateLabel();
+
+            asm.MarkLabel(lblLoop);
+
+            asm.Cmp(tmp0, tmp3);
+            asm.B(lblFail, ArmCondition.GeUn);
+            asm.LdrRiPost(tmp2, tmp0, 16);
+            asm.Cmp(tmp1, tmp2);
+            asm.B(lblFound, ArmCondition.Eq);
+            asm.B(lblLoop);
+
+            asm.MarkLabel(lblFound);
+
+            asm.Ldur(tmp0, tmp0, -8);
         }
 
-        private static void WritePointer(uint[] code, int movIndex, ulong ptr)
+        private static void PickScratchRegs(Span<int> scratchRegs, uint blacklistedRegMask)
         {
-            code[movIndex] |= (uint)(ushort)ptr << 5;
-            code[movIndex + 1] |= (uint)(ushort)(ptr >> 16) << 5;
-            code[movIndex + 2] |= (uint)(ushort)(ptr >> 32) << 5;
-            code[movIndex + 3] |= (uint)(ushort)(ptr >> 48) << 5;
+            int scratchReg = ScratchBaseReg;
+
+            for (int i = 0; i < scratchRegs.Length; i++)
+            {
+                while ((blacklistedRegMask & (1u << scratchReg)) != 0)
+                {
+                    scratchReg++;
+                }
+
+                if (scratchReg >= 29)
+                {
+                    throw new ArgumentException($"No enough register for {scratchRegs.Length} scratch register, started from {ScratchBaseReg}");
+                }
+
+                scratchRegs[i] = scratchReg++;
+            }
+        }
+
+        private static Operand Gpr(int register, OperandType type = OperandType.I64)
+        {
+            return new Operand(register, RegisterType.Integer, type);
+        }
+
+        private static Operand Vec(int register, OperandType type = OperandType.V128)
+        {
+            return new Operand(register, RegisterType.Vector, type);
+        }
+
+        private static Operand Const(ulong value)
+        {
+            return new Operand(OperandType.I64, value);
+        }
+
+        private static Operand Const(OperandType type, ulong value)
+        {
+            return new Operand(type, value);
         }
 
         private static uint GetImm26(ulong sourceAddress, ulong targetAddress)
@@ -274,6 +362,13 @@ namespace Ryujinx.Cpu.Nce
             return (uint)offsetTrunc;
         }
 
+        private static int GetOffset(ulong sourceAddress, ulong targetAddress)
+        {
+            long offset = (long)(targetAddress - sourceAddress);
+
+            return checked((int)offset);
+        }
+
         private static uint[] GetCopy(uint[] code)
         {
             uint[] codeCopy = new uint[code.Length];
@@ -282,6 +377,290 @@ namespace Ryujinx.Cpu.Nce
             return codeCopy;
         }
 
+        private static void WriteManagedCall(Assembler asm, Action<Assembler, Operand, Operand, Operand> writeCall, uint blacklistedRegMask)
+        {
+            int intMask = 0x7fffffff & (int)~blacklistedRegMask;
+            int vecMask = unchecked((int)0xffffffff);
+
+            Span<int> scratchRegs = stackalloc int[3];
+            PickScratchRegs(scratchRegs, blacklistedRegMask);
+
+            RegisterSaveRestore rsr = new(intMask, vecMask, OperandType.V128);
+
+            rsr.WritePrologue(asm);
+
+            WriteLoadContext(asm, Gpr(scratchRegs[0]), Gpr(scratchRegs[1]), Gpr(scratchRegs[2]));
+
+            asm.MovSp(Gpr(scratchRegs[1]), Gpr(Assembler.SpRegister));
+            asm.StrRiUn(Gpr(scratchRegs[1]), Gpr(scratchRegs[0]), NceNativeContext.GetGuestSPOffset());
+            asm.LdrRiUn(Gpr(scratchRegs[1]), Gpr(scratchRegs[0]), NceNativeContext.GetHostSPOffset());
+            asm.MovSp(Gpr(Assembler.SpRegister), Gpr(scratchRegs[1]));
+
+            writeCall(asm, Gpr(scratchRegs[0]), Gpr(scratchRegs[1]), Gpr(scratchRegs[2]));
+
+            asm.LdrRiUn(Gpr(scratchRegs[1]), Gpr(scratchRegs[0]), NceNativeContext.GetGuestSPOffset());
+            asm.MovSp(Gpr(Assembler.SpRegister), Gpr(scratchRegs[1]));
+
+            rsr.WriteEpilogue(asm);
+        }
+
+        public static uint[] GenerateThreadStartCode()
+        {
+            Assembler asm = new();
+
+            CreateRegisterSaveRestoreForManaged().WritePrologue(asm);
+
+            asm.MovSp(Gpr(1), Gpr(Assembler.SpRegister));
+            asm.StrRiUn(Gpr(1), Gpr(0), NceNativeContext.GetHostSPOffset());
+
+            for (int i = 2; i < 30; i += 2)
+            {
+                asm.LdpRiUn(Gpr(i), Gpr(i + 1), Gpr(0), NceNativeContext.GetXOffset(i));
+            }
+
+            for (int i = 0; i < 32; i += 2)
+            {
+                asm.LdpRiUn(Vec(i), Vec(i + 1), Gpr(0), NceNativeContext.GetVOffset(i));
+            }
+
+            asm.LdpRiUn(Gpr(30), Gpr(1), Gpr(0), NceNativeContext.GetXOffset(30));
+            asm.MovSp(Gpr(Assembler.SpRegister), Gpr(1));
+
+            asm.StrRiUn(Gpr(Assembler.ZrRegister, OperandType.I32), Gpr(0), NceNativeContext.GetInManagedOffset());
+
+            asm.LdpRiUn(Gpr(0), Gpr(1), Gpr(0), NceNativeContext.GetXOffset(0));
+            asm.Br(Gpr(30));
+
+            return asm.GetCode();
+        }
+
+        public static uint[] GenerateSuspendExceptionHandler()
+        {
+            Assembler asm = new();
+
+            Span<int> scratchRegs = stackalloc int[4];
+            PickScratchRegs(scratchRegs, 0u);
+
+            RegisterSaveRestore rsr = new((1 << scratchRegs[0]) | (1 << scratchRegs[1]) | (1 << scratchRegs[2]) | (1 << scratchRegs[3]), hasCall: true);
+
+            rsr.WritePrologue(asm);
+
+            Operand lblAgain = asm.CreateLabel();
+            Operand lblFail = asm.CreateLabel();
+
+            WriteLoadContextSafe(asm, lblFail, Gpr(scratchRegs[0]), Gpr(scratchRegs[1]), Gpr(scratchRegs[2]), Gpr(scratchRegs[3]));
+
+            asm.LdrRiUn(Gpr(scratchRegs[1]), Gpr(scratchRegs[0]), NceNativeContext.GetHostSPOffset());
+            asm.MovSp(Gpr(scratchRegs[2]), Gpr(Assembler.SpRegister));
+            asm.MovSp(Gpr(Assembler.SpRegister), Gpr(scratchRegs[1]));
+
+            asm.Cmp(Gpr(0, OperandType.I32), Const((ulong)NceThreadPal.UnixSuspendSignal));
+            asm.B(lblFail, ArmCondition.Ne);
+
+            // SigUsr2
+
+            asm.Mov(Gpr(scratchRegs[1], OperandType.I32), Const(OperandType.I32, 1));
+            asm.StrRiUn(Gpr(scratchRegs[1], OperandType.I32), Gpr(scratchRegs[0]), NceNativeContext.GetInManagedOffset());
+
+            asm.MarkLabel(lblAgain);
+
+            asm.Mov(Gpr(scratchRegs[3]), (ulong)NceNativeInterface.GetSuspendThreadHandlerFunctionPointer());
+            asm.Blr(Gpr(scratchRegs[3]));
+
+            // TODO: Check return value, exit if we must.
+
+            WriteInManagedLockReleaseForSuspendHandler(asm, Gpr(scratchRegs[0]), Gpr(scratchRegs[1]), Gpr(scratchRegs[3]), lblAgain);
+
+            asm.MovSp(Gpr(Assembler.SpRegister), Gpr(scratchRegs[2]));
+
+            rsr.WriteEpilogue(asm);
+
+            asm.Ret(Gpr(30));
+
+            asm.MarkLabel(lblFail);
+
+            rsr.WriteEpilogue(asm);
+
+            asm.Ret(Gpr(30));
+
+            return asm.GetCode();
+        }
+
+        public static uint[] GenerateWrapperExceptionHandler(IntPtr oldSignalHandlerSegfaultPtr, IntPtr signalHandlerPtr)
+        {
+            Assembler asm = new();
+
+            Span<int> scratchRegs = stackalloc int[4];
+            PickScratchRegs(scratchRegs, 0u);
+
+            RegisterSaveRestore rsr = new((1 << scratchRegs[0]) | (1 << scratchRegs[1]) | (1 << scratchRegs[2]) | (1 << scratchRegs[3]), hasCall: true);
+
+            rsr.WritePrologue(asm);
+
+            Operand lblFail = asm.CreateLabel();
+
+            WriteLoadContextSafe(asm, lblFail, Gpr(scratchRegs[0]), Gpr(scratchRegs[1]), Gpr(scratchRegs[2]), Gpr(scratchRegs[3]));
+
+            asm.LdrRiUn(Gpr(scratchRegs[1]), Gpr(scratchRegs[0]), NceNativeContext.GetHostSPOffset());
+            asm.MovSp(Gpr(scratchRegs[2]), Gpr(Assembler.SpRegister));
+            asm.MovSp(Gpr(Assembler.SpRegister), Gpr(scratchRegs[1]));
+
+            // SigSegv
+
+            WriteInManagedLockAcquire(asm, Gpr(scratchRegs[0]), Gpr(scratchRegs[1]), Gpr(scratchRegs[3]));
+
+            asm.Mov(Gpr(scratchRegs[3]), (ulong)signalHandlerPtr);
+            asm.Blr(Gpr(scratchRegs[3]));
+
+            WriteInManagedLockRelease(asm, Gpr(scratchRegs[0]), Gpr(scratchRegs[1]), Gpr(scratchRegs[3]), ThreadExitMethod.None);
+
+            asm.MovSp(Gpr(Assembler.SpRegister), Gpr(scratchRegs[2]));
+
+            rsr.WriteEpilogue(asm);
+
+            asm.Ret(Gpr(30));
+
+            asm.MarkLabel(lblFail);
+
+            rsr.WriteEpilogue(asm);
+
+            asm.Mov(Gpr(3), (ulong)oldSignalHandlerSegfaultPtr);
+            asm.Br(Gpr(3));
+
+            return asm.GetCode();
+        }
+
+        private static void WriteInManagedLockAcquire(Assembler asm, Operand ctx, Operand tmp, Operand tmp2)
+        {
+            Operand tmpUint = new Operand(tmp.GetRegister().Index, RegisterType.Integer, OperandType.I32);
+            Operand tmp2Uint = new Operand(tmp2.GetRegister().Index, RegisterType.Integer, OperandType.I32);
+
+            Operand lblLoop = asm.CreateLabel();
+
+            // Bit 0 set means that the thread is currently executing managed code (that case should be impossible here).
+            // Bit 1 being set means there is a signal pending, we should wait for the signal, otherwise it could trigger
+            // while running managed code.
+
+            asm.MarkLabel(lblLoop);
+
+            asm.Add(tmp, ctx, Const((ulong)NceNativeContext.GetInManagedOffset()));
+            asm.Ldaxr(tmp2Uint, tmp);
+            asm.Cbnz(tmp2Uint, lblLoop);
+            asm.Mov(tmp2Uint, Const(OperandType.I32, 1));
+            asm.Stlxr(tmp2Uint, tmp, tmpUint);
+            asm.Cbnz(tmpUint, lblLoop); // Retry if store failed.
+        }
+
+        private enum ThreadExitMethod
+        {
+            None,
+            GenerateReturn,
+            Label
+        }
+
+        private static void WriteInManagedLockRelease(Assembler asm, Operand ctx, Operand tmp, Operand tmp2, ThreadExitMethod exitMethod, Operand lblQuit = default)
+        {
+            Operand tmpUint = new Operand(tmp.GetRegister().Index, RegisterType.Integer, OperandType.I32);
+            Operand tmp2Uint = new Operand(tmp2.GetRegister().Index, RegisterType.Integer, OperandType.I32);
+
+            Operand lblLoop = asm.CreateLabel();
+            Operand lblInterrupt = asm.CreateLabel();
+            Operand lblDone = asm.CreateLabel();
+
+            // Bit 0 set means that the thread is currently executing managed code (it should be always set here, as we just returned from managed code).
+            // Bit 1 being set means a interrupt was requested while it was in managed, we should service it.
+
+            asm.MarkLabel(lblLoop);
+
+            asm.Add(tmp, ctx, Const((ulong)NceNativeContext.GetInManagedOffset()));
+            asm.Ldaxr(tmp2Uint, tmp);
+            asm.Cmp(tmp2Uint, Const(OperandType.I32, 3));
+            asm.B(lblInterrupt, ArmCondition.Eq);
+            asm.Stlxr(Gpr(Assembler.ZrRegister, OperandType.I32), tmp, tmpUint);
+            asm.Cbnz(tmpUint, lblLoop); // Retry if store failed.
+            asm.B(lblDone);
+
+            asm.MarkLabel(lblInterrupt);
+
+            // If we got here, a interrupt was requested while it was in managed code.
+            // Let's service the interrupt and check what we should do next.
+
+            asm.Mov(tmp2Uint, Const(OperandType.I32, 1));
+            asm.Stlxr(tmp2Uint, tmp, tmpUint);
+            asm.Cbnz(tmpUint, lblLoop); // Retry if store failed.
+            asm.Mov(tmp, (ulong)NceNativeInterface.GetSuspendThreadHandlerFunctionPointer());
+            asm.Blr(tmp);
+
+            // The return value from the interrupt handler indicates if we should continue running.
+            // From here, we either try to release the lock again. We might have received another interrupt
+            // request in the meantime, in which case we should service it again.
+            // If we were requested to exit, then we exit if we can.
+            // TODO: We should also exit while on a signal handler. To do that we need to modify the PC value on the
+            // context. It's a bit more tricky to do, so for now we ignore that case with "ThreadExitMethod.None".
+
+            if (exitMethod == ThreadExitMethod.None)
+            {
+                asm.B(lblLoop);
+            }
+            else
+            {
+                asm.Cbnz(Gpr(0, OperandType.I32), lblLoop);
+
+                if (exitMethod == ThreadExitMethod.Label)
+                {
+                    asm.B(lblQuit);
+                }
+                else if (exitMethod == ThreadExitMethod.GenerateReturn)
+                {
+                    CreateRegisterSaveRestoreForManaged().WriteEpilogue(asm);
+
+                    asm.Ret(Gpr(30));
+                }
+            }
+
+            asm.MarkLabel(lblDone);
+        }
+
+        private static void WriteInManagedLockReleaseForSuspendHandler(Assembler asm, Operand ctx, Operand tmp, Operand tmp2, Operand lblAgain)
+        {
+            Operand tmpUint = new Operand(tmp.GetRegister().Index, RegisterType.Integer, OperandType.I32);
+            Operand tmp2Uint = new Operand(tmp2.GetRegister().Index, RegisterType.Integer, OperandType.I32);
+
+            Operand lblLoop = asm.CreateLabel();
+            Operand lblInterrupt = asm.CreateLabel();
+            Operand lblDone = asm.CreateLabel();
+
+            // Bit 0 set means that the thread is currently executing managed code (it should be always set here, as we just returned from managed code).
+            // Bit 1 being set means a interrupt was requested while it was in managed, we should service it.
+
+            asm.MarkLabel(lblLoop);
+
+            asm.Add(tmp, ctx, Const((ulong)NceNativeContext.GetInManagedOffset()));
+            asm.Ldaxr(tmp2Uint, tmp);
+            asm.Cmp(tmp2Uint, Const(OperandType.I32, 3));
+            asm.B(lblInterrupt, ArmCondition.Eq);
+            asm.Stlxr(Gpr(Assembler.ZrRegister, OperandType.I32), tmp, tmpUint);
+            asm.Cbnz(tmpUint, lblLoop); // Retry if store failed.
+            asm.B(lblDone);
+
+            asm.MarkLabel(lblInterrupt);
+
+            // If we got here, a interrupt was requested while it was in managed code.
+            // Let's service the interrupt and check what we should do next.
+
+            asm.Mov(tmp2Uint, Const(OperandType.I32, 1));
+            asm.Stlxr(tmp2Uint, tmp, tmpUint);
+            asm.Cbnz(tmpUint, lblLoop); // Retry if store failed.
+            asm.B(lblAgain);
+
+            asm.MarkLabel(lblDone);
+        }
+
+        private static RegisterSaveRestore CreateRegisterSaveRestoreForManaged()
+        {
+            return new RegisterSaveRestore((int)IntCalleeSavedRegsMask, unchecked((int)FpCalleeSavedRegsMask), OperandType.FP64, hasCall: true);
+        }
+
         private static void WriteCode(ICpuMemoryManager memoryManager, ulong address, uint[] code)
         {
             for (int i = 0; i < code.Length; i++)
diff --git a/src/Ryujinx.Cpu/Nce/nce.S b/src/Ryujinx.Cpu/Nce/nce.S
deleted file mode 100644
index bf60676af..000000000
--- a/src/Ryujinx.Cpu/Nce/nce.S
+++ /dev/null
@@ -1,378 +0,0 @@
-.text
-
-.macro longmov0 reg
-mov \reg, #0
-movk \reg, #0, lsl #16
-movk \reg, #0, lsl #32
-movk \reg, #0, lsl #48
-.endm
-
-// r1 = EntriesPointer
-// r2 = current_thread_id_local
-// r3 = expected_thread_id
-// r4 = ThreadsCount_local
-.macro loadctxptr_reg r1, r2, r3
-longmov0 \r1
-mrs \r2, tpidr_el0
-1:
-ldr \r3, [\r1], #16
-cmp \r2, \r3
-beq 2f
-b 1b
-2:
-ldr \r1, [\r1, #-8]
-.endm
-
-.macro loadctxptr
-loadctxptr_reg x19, x20, x21
-.endm
-
-.global GetTpidrEl0Code
-GetTpidrEl0Code:
-mrs x0, tpidr_el0
-ret
-
-.global ThreadStartCode
-ThreadStartCode:
-stp x19, x20, [sp, #-0x120]!
-stp x21, x22, [sp, #0x10]
-stp x23, x24, [sp, #0x20]
-stp x25, x26, [sp, #0x30]
-stp x27, x28, [sp, #0x40]
-stp x29, x30, [sp, #0x50]
-stp d8, d9, [sp, #0x60]
-stp d10, d11, [sp, #0x70]
-stp d12, d13, [sp, #0x80]
-stp d14, d15, [sp, #0x90]
-stp d16, d17, [sp, #0xA0]
-stp d18, d19, [sp, #0xB0]
-stp d20, d21, [sp, #0xC0]
-stp d22, d23, [sp, #0xD0]
-stp d24, d25, [sp, #0xE0]
-stp d26, d27, [sp, #0xF0]
-stp d28, d29, [sp, #0x100]
-stp d30, d31, [sp, #0x110]
-str wzr, [x0, #0x31C]
-mov x1, sp
-str x1, [x0, #0x320]
-ldp x2, x3, [x0, #0x10]
-ldp x4, x5, [x0, #0x20]
-ldp x6, x7, [x0, #0x30]
-ldp x8, x9, [x0, #0x40]
-ldp x10, x11, [x0, #0x50]
-ldp x12, x13, [x0, #0x60]
-ldp x14, x15, [x0, #0x70]
-ldp x16, x17, [x0, #0x80]
-ldp x18, x19, [x0, #0x90]
-ldp x20, x21, [x0, #0xA0]
-ldp x22, x23, [x0, #0xB0]
-ldp x24, x25, [x0, #0xC0]
-ldp x26, x27, [x0, #0xD0]
-ldp x28, x29, [x0, #0xE0]
-ldp q0, q1, [x0, #0x100]
-ldp q2, q3, [x0, #0x120]
-ldp q4, q5, [x0, #0x140]
-ldp q6, q7, [x0, #0x160]
-ldp q8, q9, [x0, #0x180]
-ldp q10, q11, [x0, #0x1A0]
-ldp q12, q13, [x0, #0x1C0]
-ldp q14, q15, [x0, #0x1E0]
-ldp q16, q17, [x0, #0x200]
-ldp q18, q19, [x0, #0x220]
-ldp q20, q21, [x0, #0x240]
-ldp q22, q23, [x0, #0x260]
-ldp q24, q25, [x0, #0x280]
-ldp q26, q27, [x0, #0x2A0]
-ldp q28, q29, [x0, #0x2C0]
-ldp q30, q31, [x0, #0x2E0]
-ldp x30, x1, [x0, #0xF0]
-mov sp, x1
-ldp x0, x1, [x0, #0x0]
-br x30
-
-// Inputs
-// r1 = EntriesPointer
-// r2 = current_thread_id_local
-// r3 = expected_thread_id
-// r4 = EntriesPointerEnd
-
-// Outputs
-// r1 = EntryPointer or 0x0 on not found
-.macro loadctxptr_safe_reg r1, r2, r3, r4
-longmov0 \r1
-ldr \r4, [\r1, #-8]
-add \r4, \r1, \r4, lsl #4
-mrs \r2, tpidr_el0
-1:
-cmp \r1, \r4
-beq 2f
-ldr \r3, [\r1], #16
-cmp \r2, \r3
-beq 3f
-b 1b
-2:
-mov \r1, 0x0
-b 4f
-3:
-ldr \r1, [\r1, #-8]
-4:
-.endm
-
-.global ExceptionHandlerEntryCode
-ExceptionHandlerEntryCode:
-stp x19, x20, [sp, #-0x40]!
-stp x21, x22, [sp, #0x10]
-stp x23, x24, [sp, #0x20]
-str x25, [sp, #0x30]
-// signo
-mov x19, x0
-// siginfo_t *si
-mov x20, x1
-// void *thread_id
-mov x21, x2
-mov x22, sp
-mov x23, x30
-// x24 = EntriesPointer
-// x1 = si
-// x2 = thread_id
-loadctxptr_safe_reg x24, x1, x2, x25
-cbz x24, 4f
-ldr x0, [x24, 0x320]
-mov sp, x0
-cmp w19, #0
-beq 1f
-mov w0, #1
-str w0, [x24, 0x31C]
-mov x0, x19
-mov x1, x20
-mov x2, x21
-mov x8, #0
-movk x8, #0, lsl #16
-movk x8, #0, lsl #32
-movk x8, #0, lsl #48
-blr x8
-b 2f
-1:
-ldr w0, [x24, 0x31C]
-cbnz w0, 3f
-mov w0, #1
-str w0, [x24, 0x31C]
-mov x0, #0
-movk x0, #0, lsl #16
-movk x0, #0, lsl #32
-movk x0, #0, lsl #48
-blr x0
-2:
-str wzr, [x24, 0x31C]
-3:
-mov sp, x22
-mov x30, x23
-ldp x21, x22, [sp, #0x10]
-ldp x23, x24, [sp, #0x20]
-ldp x25, x26, [sp, #0x30]
-ldp x19, x20, [sp], #0x40
-ret
-4:
-// ThreadId is invalid, forward to other handler.
-mov x0, x19
-mov x1, x20
-mov x2, x21
-mov sp, x22
-ldp x21, x22, [sp, #0x10]
-ldp x23, x24, [sp, #0x20]
-ldr x25, [sp, #0x30]
-ldp x19, x20, [sp], #0x40
-longmov0 x3
-br x3
-
-.global SvcPatchCode
-SvcPatchCode:
-
-stp x19, x20, [sp, #-0x20]!
-str x21, [sp, #0x10]
-loadctxptr
-stp x0, x1, [x19, #0x0]
-stp x2, x3, [x19, #0x10]
-stp x4, x5, [x19, #0x20]
-stp x6, x7, [x19, #0x30]
-stp x8, x9, [x19, #0x40]
-stp x10, x11, [x19, #0x50]
-stp x12, x13, [x19, #0x60]
-stp x14, x15, [x19, #0x70]
-stp x16, x17, [x19, #0x80]
-ldr x21, [sp, #0x10]
-ldp x0, x20, [sp], #0x20
-stp x18, x0, [x19, #0x90]
-stp x20, x21, [x19, #0xA0]
-stp x22, x23, [x19, #0xB0]
-stp x24, x25, [x19, #0xC0]
-stp x26, x27, [x19, #0xD0]
-stp x28, x29, [x19, #0xE0]
-mov x0, sp
-stp x30, x0, [x19, #0xF0]
-stp q0, q1, [x19, #0x100]
-stp q2, q3, [x19, #0x120]
-stp q4, q5, [x19, #0x140]
-stp q6, q7, [x19, #0x160]
-stp q8, q9, [x19, #0x180]
-stp q10, q11, [x19, #0x1A0]
-stp q12, q13, [x19, #0x1C0]
-stp q14, q15, [x19, #0x1E0]
-stp q16, q17, [x19, #0x200]
-stp q18, q19, [x19, #0x220]
-stp q20, q21, [x19, #0x240]
-stp q22, q23, [x19, #0x260]
-stp q24, q25, [x19, #0x280]
-stp q26, q27, [x19, #0x2A0]
-stp q28, q29, [x19, #0x2C0]
-stp q30, q31, [x19, #0x2E0]
-ldr x0, [x19, #0x320]
-mov sp, x0
-mov w0, #1
-str w0, [x19, #0x31C]
-mov w0, #0
-ldr x8, [x19, #0x350]
-blr x8
-cbnz w0, 1f
-ldp d30, d31, [sp, #0x110]
-ldp d28, d29, [sp, #0x100]
-ldp d26, d27, [sp, #0xF0]
-ldp d24, d25, [sp, #0xE0]
-ldp d22, d23, [sp, #0xD0]
-ldp d20, d21, [sp, #0xC0]
-ldp d18, d19, [sp, #0xB0]
-ldp d16, d17, [sp, #0xA0]
-ldp d14, d15, [sp, #0x90]
-ldp d12, d13, [sp, #0x80]
-ldp d10, d11, [sp, #0x70]
-ldp d8, d9, [sp, #0x60]
-ldp x29, x30, [sp, #0x50]
-ldp x27, x28, [sp, #0x40]
-ldp x25, x26, [sp, #0x30]
-ldp x23, x24, [sp, #0x20]
-ldp x21, x22, [sp, #0x10]
-ldp x19, x20, [sp], #0x120
-ret
-1:
-str wzr, [x19, #0x31C]
-ldp x30, x0, [x19, #0xF0]
-mov sp, x0
-ldp x0, x1, [x19, #0x0]
-ldp x2, x3, [x19, #0x10]
-ldp x4, x5, [x19, #0x20]
-ldp x6, x7, [x19, #0x30]
-ldp x8, x9, [x19, #0x40]
-ldp x10, x11, [x19, #0x50]
-ldp x12, x13, [x19, #0x60]
-ldp x14, x15, [x19, #0x70]
-ldp x16, x17, [x19, #0x80]
-ldr x18, [x19, #0x90]
-ldp x20, x21, [x19, #0xA0]
-ldp x22, x23, [x19, #0xB0]
-ldp x24, x25, [x19, #0xC0]
-ldp x26, x27, [x19, #0xD0]
-ldp x28, x29, [x19, #0xE0]
-ldp q0, q1, [x19, #0x100]
-ldp q2, q3, [x19, #0x120]
-ldp q4, q5, [x19, #0x140]
-ldp q6, q7, [x19, #0x160]
-ldp q8, q9, [x19, #0x180]
-ldp q10, q11, [x19, #0x1A0]
-ldp q12, q13, [x19, #0x1C0]
-ldp q14, q15, [x19, #0x1E0]
-ldp q16, q17, [x19, #0x200]
-ldp q18, q19, [x19, #0x220]
-ldp q20, q21, [x19, #0x240]
-ldp q22, q23, [x19, #0x260]
-ldp q24, q25, [x19, #0x280]
-ldp q26, q27, [x19, #0x2A0]
-ldp q28, q29, [x19, #0x2C0]
-ldp q30, q31, [x19, #0x2E0]
-ldr x19, [x19, #0x98]
-b #0
-
-.global MrsTpidrroEl0PatchCode
-MrsTpidrroEl0PatchCode:
-stp xzr, x19, [sp, #-0x20]!
-stp x20, x21, [sp, #0x10]
-loadctxptr
-ldr x19, [x19, #0x308]
-str x19, [sp]
-ldp x20, x21, [sp, #0x10]
-ldr x19, [sp, #8]
-ldr x0, [sp], #0x20
-b #0
-
-.global MrsTpidrEl0PatchCode
-MrsTpidrEl0PatchCode:
-stp xzr, x19, [sp, #-0x20]!
-stp x20, x21, [sp, #0x10]
-loadctxptr
-ldr x19, [x19, #0x300]
-str x19, [sp]
-ldp x20, x21, [sp, #0x10]
-ldr x19, [sp, #8]
-ldr x0, [sp], #0x20
-b #0
-
-.global MrsCtrEl0PatchCode
-MrsCtrEl0PatchCode:
-stp xzr, x19, [sp, #-0x20]!
-stp x20, x21, [sp, #0x10]
-loadctxptr
-ldr x19, [x19, #0x338]
-str x19, [sp]
-ldp x20, x21, [sp, #0x10]
-ldr x19, [sp, #8]
-ldr x0, [sp], #0x20
-b #0
-
-.global MsrTpidrEl0PatchCode
-MsrTpidrEl0PatchCode:
-stp x19, x0, [sp, #-0x20]!
-stp x20, x21, [sp, #0x10]
-loadctxptr
-ldr x20, [sp, #8]
-str x20, [x19, #0x300]
-ldp x20, x21, [sp, #0x10]
-ldr x19, [sp], #0x20
-b #0
-
-.global MrsCntpctEl0PatchCode
-MrsCntpctEl0PatchCode:
-stp x0, x1, [sp, #-0xC0]!
-stp x2, x3, [sp, #0x10]
-stp x4, x5, [sp, #0x20]
-stp x6, x7, [sp, #0x30]
-stp x8, x9, [sp, #0x40]
-stp x10, x11, [sp, #0x50]
-stp x12, x13, [sp, #0x60]
-stp x14, x15, [sp, #0x70]
-stp x16, x17, [sp, #0x80]
-stp x18, x19, [sp, #0x90]
-stp x20, x21, [sp, #0xA0]
-str x30, [sp, #0xB8]
-loadctxptr
-mov w0, #1
-str w0, [x19, #0x31C]
-mov x0, #0
-movk x0, #0, lsl #16
-movk x0, #0, lsl #32
-movk x0, #0, lsl #48
-blr x0
-str wzr, [x19, #0x31C]
-str x0, [sp, #0xB0]
-ldr x30, [sp, #0xB8]
-ldp x20, x21, [sp, #0xA0]
-ldp x18, x19, [sp, #0x90]
-ldp x16, x17, [sp, #0x80]
-ldp x14, x15, [sp, #0x70]
-ldp x12, x13, [sp, #0x60]
-ldp x10, x11, [sp, #0x50]
-ldp x8, x9, [sp, #0x40]
-ldp x6, x7, [sp, #0x30]
-ldp x4, x5, [sp, #0x20]
-ldp x2, x3, [sp, #0x10]
-ldp x0, x1, [sp], #0xB0
-ldr x0, [sp], #0x10
-b #0
diff --git a/src/Ryujinx.Cpu/Signal/NativeSignalHandler.cs b/src/Ryujinx.Cpu/Signal/NativeSignalHandler.cs
index bcd0fc1a7..531a0a56a 100644
--- a/src/Ryujinx.Cpu/Signal/NativeSignalHandler.cs
+++ b/src/Ryujinx.Cpu/Signal/NativeSignalHandler.cs
@@ -70,7 +70,7 @@ namespace Ryujinx.Cpu.Signal
             config = new SignalHandlerConfig();
         }
 
-        public static void InitializeSignalHandler(ulong pageSize, Func<IntPtr, IntPtr, IntPtr> customSignalHandlerFactory = null, int userSignal = -1)
+        public static void InitializeSignalHandler(ulong pageSize, Func<IntPtr, IntPtr, IntPtr> customSignalHandlerFactory = null)
         {
             if (_initialized)
             {
@@ -107,14 +107,14 @@ namespace Ryujinx.Cpu.Signal
 
                     if (Ryujinx.Common.SystemInfo.SystemInfo.IsAndroid())
                     {
-                        var old = AndroidSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr, userSignal);
+                        var old = AndroidSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
 
                         config.UnixOldSigaction = (nuint)(ulong)old.sa_handler;
                         config.UnixOldSigaction3Arg = old.sa_flags & 4;
                     }
                     else
                     {
-                        var old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr, userSignal);
+                        var old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
 
                         config.UnixOldSigaction = (nuint)(ulong)old.sa_handler;
                         config.UnixOldSigaction3Arg = old.sa_flags & 4;
@@ -139,6 +139,21 @@ namespace Ryujinx.Cpu.Signal
             }
         }
 
+        public static void InstallUnixAlternateStackForCurrentThread(IntPtr stackPtr, ulong stackSize)
+        {
+            UnixSignalHandlerRegistration.RegisterAlternateStack(stackPtr, stackSize);
+        }
+
+        public static void UninstallUnixAlternateStackForCurrentThread()
+        {
+            UnixSignalHandlerRegistration.UnregisterAlternateStack();
+        }
+
+        public static void InstallUnixSignalHandler(int sigNum, IntPtr action)
+        {
+            UnixSignalHandlerRegistration.RegisterExceptionHandler(sigNum, action);
+        }
+
         private static IntPtr MapCode(ReadOnlySpan<byte> code)
         {
             Debug.Assert(_codeBlock == null);
diff --git a/src/Ryujinx.Cpu/Signal/UnixSignalHandlerRegistration.cs b/src/Ryujinx.Cpu/Signal/UnixSignalHandlerRegistration.cs
index 1d9f514eb..ca4da38db 100644
--- a/src/Ryujinx.Cpu/Signal/UnixSignalHandlerRegistration.cs
+++ b/src/Ryujinx.Cpu/Signal/UnixSignalHandlerRegistration.cs
@@ -21,8 +21,7 @@ namespace Ryujinx.Cpu.Signal
             public IntPtr sa_restorer;
         }
 
-        [SupportedOSPlatform("android")]
-        [StructLayout(LayoutKind.Sequential, Pack = 8)]
+        [SupportedOSPlatform("android"), StructLayout(LayoutKind.Sequential, Pack = 8)]
         public struct SigActionBionic
         {
             public int sa_flags;
@@ -31,27 +30,38 @@ namespace Ryujinx.Cpu.Signal
             public IntPtr sa_restorer;
         }
 
+        [StructLayout(LayoutKind.Sequential, Pack = 8)]
+        public struct Stack
+        {
+            public IntPtr ss_sp;
+            public int ss_flags;
+            public IntPtr ss_size;
+        }
+
         private const int SIGSEGV = 11;
         private const int SIGBUS = 10;
         private const int SA_SIGINFO = 0x00000004;
+        private const int SA_ONSTACK = 0x08000000;
+        private const int SS_DISABLE = 2;
 
         [LibraryImport("libc", SetLastError = true)]
         private static partial int sigaction(int signum, ref SigAction sigAction, out SigAction oldAction);
 
-        [SupportedOSPlatform("android")]
-        [LibraryImport("libc", SetLastError = true)]
+        [SupportedOSPlatform("android"), LibraryImport("libc", SetLastError = true)]
         private static partial int sigaction(int signum, ref SigActionBionic sigAction, out SigActionBionic oldAction);
 
         [LibraryImport("libc", SetLastError = true)]
         private static partial int sigaction(int signum, IntPtr sigAction, out SigAction oldAction);
 
-        [SupportedOSPlatform("android")]
-        [LibraryImport("libc", SetLastError = true)]
+        [SupportedOSPlatform("android"), LibraryImport("libc", SetLastError = true)]
         private static partial int sigaction(int signum, IntPtr sigAction, out SigActionBionic oldAction);
 
         [LibraryImport("libc", SetLastError = true)]
         private static partial int sigemptyset(ref SigSet set);
 
+        [LibraryImport("libc", SetLastError = true)]
+        private static partial int sigaltstack(ref Stack ss, out Stack oldSs);
+
         public static SigAction GetSegfaultExceptionHandler()
         {
             int result;
@@ -76,13 +86,13 @@ namespace Ryujinx.Cpu.Signal
 
             if (result != 0)
             {
-                throw new InvalidOperationException($"Could not get SIGSEGV sigaction. Error: {result}");
+                throw new SystemException($"Could not get SIGSEGV sigaction. Error: {Marshal.GetLastPInvokeErrorMessage()}");
             }
 
             return old;
         }
 
-        public static SigAction RegisterExceptionHandler(IntPtr action, int userSignal = -1)
+        public static SigAction RegisterExceptionHandler(IntPtr action)
         {
             int result;
             SigAction old;
@@ -106,21 +116,6 @@ namespace Ryujinx.Cpu.Signal
                     sa_flags = tmp.sa_flags,
                     sa_restorer = tmp.sa_restorer
                 };
-
-                if (userSignal != -1)
-                {
-                    result = sigaction(userSignal, ref sig, out SigActionBionic oldu);
-
-                    if (oldu.sa_handler != IntPtr.Zero)
-                    {
-                        throw new InvalidOperationException($"SIG{userSignal} is already in use.");
-                    }
-
-                    if (result != 0)
-                    {
-                        throw new InvalidOperationException($"Could not register SIG{userSignal} sigaction. Error: {result}");
-                    }
-                }
             }
             else
             {
@@ -136,7 +131,7 @@ namespace Ryujinx.Cpu.Signal
 
                 if (result != 0)
                 {
-                    throw new InvalidOperationException($"Could not register SIGSEGV sigaction. Error: {result}");
+                    throw new SystemException($"Could not register SIGSEGV sigaction. Error: {Marshal.GetLastPInvokeErrorMessage()}");
                 }
 
                 if (OperatingSystem.IsMacOS())
@@ -145,22 +140,7 @@ namespace Ryujinx.Cpu.Signal
 
                     if (result != 0)
                     {
-                        throw new InvalidOperationException($"Could not register SIGBUS sigaction. Error: {result}");
-                    }
-                }
-
-                if (userSignal != -1)
-                {
-                    result = sigaction(userSignal, ref sig, out SigAction oldu);
-
-                    if (oldu.sa_handler != IntPtr.Zero)
-                    {
-                        throw new InvalidOperationException($"SIG{userSignal} is already in use.");
-                    }
-
-                    if (result != 0)
-                    {
-                        throw new InvalidOperationException($"Could not register SIG{userSignal} sigaction. Error: {result}");
+                        throw new SystemException($"Could not register SIGBUS sigaction. Error: {Marshal.GetLastPInvokeErrorMessage()}");
                     }
                 }
             }
@@ -168,6 +148,87 @@ namespace Ryujinx.Cpu.Signal
             return old;
         }
 
+        public static void RegisterAlternateStack(IntPtr stackPtr, ulong stackSize)
+        {
+            Stack stack = new()
+            {
+                ss_sp = stackPtr,
+                ss_size = (IntPtr)stackSize
+            };
+
+            int result = sigaltstack(ref stack, out _);
+
+            if (result != 0)
+            {
+                throw new SystemException($"Could not set alternate stack. Error: {Marshal.GetLastPInvokeErrorMessage()}");
+            }
+        }
+
+        public static void UnregisterAlternateStack()
+        {
+            Stack stack = new()
+            {
+                ss_flags = SS_DISABLE
+            };
+
+            int result = sigaltstack(ref stack, out _);
+
+            if (result != 0)
+            {
+                throw new SystemException($"Could not remove alternate stack. Error: {Marshal.GetLastPInvokeErrorMessage()}");
+            }
+        }
+
+        public static void RegisterExceptionHandler(int sigNum, IntPtr action)
+        {
+            int result;
+
+            if (Ryujinx.Common.SystemInfo.SystemInfo.IsAndroid())
+            {
+                SigActionBionic sig = new()
+                {
+                    sa_handler = action,
+                    sa_flags = SA_SIGINFO | SA_ONSTACK
+                };
+
+                sigemptyset(ref sig.sa_mask);
+
+                result = sigaction(sigNum, ref sig, out SigActionBionic oldu);
+
+                if (oldu.sa_handler != IntPtr.Zero)
+                {
+                    throw new SystemException($"SIG{sigNum} is already in use.");
+                }
+
+                if (result != 0)
+                {
+                    throw new SystemException($"Could not register SIG{sigNum} sigaction. Error: {Marshal.GetLastPInvokeErrorMessage()}");
+                }
+            }
+            else
+            {
+                SigAction sig = new()
+                {
+                    sa_handler = action,
+                    sa_flags = SA_SIGINFO | SA_ONSTACK,
+                };
+
+                sigemptyset(ref sig.sa_mask);
+
+                result = sigaction(sigNum, ref sig, out SigAction oldu);
+
+                if (oldu.sa_handler != IntPtr.Zero)
+                {
+                    throw new SystemException($"SIG{sigNum} is already in use.");
+                }
+
+                if (result != 0)
+                {
+                    throw new SystemException($"Could not register SIG{sigNum} sigaction. Error: {Marshal.GetLastPInvokeErrorMessage()}");
+                }
+            }
+        }
+
         public static bool RestoreExceptionHandler(SigAction oldAction)
         {
             if (Ryujinx.Common.SystemInfo.SystemInfo.IsAndroid())