From a731ab3a2aad56e6ceb8b4e2444a61353246295c Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Thu, 8 Aug 2019 15:56:22 -0300
Subject: [PATCH] Add a new JIT compiler for CPU code (#693)

* Start of the ARMeilleure project

* Refactoring around the old IRAdapter, now renamed to PreAllocator

* Optimize the LowestBitSet method

* Add CLZ support and fix CLS implementation

* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks

* Implement the ByteSwap IR instruction, and some refactoring on the assembler

* Implement the DivideUI IR instruction and fix 64-bits IDIV

* Correct constant operand type on CSINC

* Move division instructions implementation to InstEmitDiv

* Fix destination type for the ConditionalSelect IR instruction

* Implement UMULH and SMULH, with new IR instructions

* Fix some issues with shift instructions

* Fix constant types for BFM instructions

* Fix up new tests using the new V128 struct

* Update tests

* Move DIV tests to a separate file

* Add support for calls, and some instructions that depends on them

* Start adding support for SIMD & FP types, along with some of the related ARM instructions

* Fix some typos and the divide instruction with FP operands

* Fix wrong method call on Clz_V

* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes

* Implement SIMD logical instructions and more misc. fixes

* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations

* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes

* Implement SIMD shift instruction and fix Dup_V

* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table

* Fix check with tolerance on tester

* Implement FP & SIMD comparison instructions, and some fixes

* Update FCVT (Scalar) encoding on the table to support the Half-float variants

* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes

* Use old memory access methods, made a start on SIMD memory insts support, some fixes

* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes

* Fix arguments count with struct return values, other fixes

* More instructions

* Misc. fixes and integrate LDj3SNuD fixes

* Update tests

* Add a faster linear scan allocator, unwinding support on windows, and other changes

* Update Ryujinx.HLE

* Update Ryujinx.Graphics

* Fix V128 return pointer passing, RCX is clobbered

* Update Ryujinx.Tests

* Update ITimeZoneService

* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks

* Use generic GetFunctionPointerForDelegate method and other tweaks

* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics

* Remove some unused code on the assembler

* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler

* Add hardware capability detection

* Fix regression on Sha1h and revert Fcm** changes

* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator

* Fix silly mistake introduced on last commit on CpuId

* Generate inline stack probes when the stack allocation is too large

* Initial support for the System-V ABI

* Support multiple destination operands

* Fix SSE2 VectorInsert8 path, and other fixes

* Change placement of XMM callee save and restore code to match other compilers

* Rename Dest to Destination and Inst to Instruction

* Fix a regression related to calls and the V128 type

* Add an extra space on comments to match code style

* Some refactoring

* Fix vector insert FP32 SSE2 path

* Port over the ARM32 instructions

* Avoid memory protection races on JIT Cache

* Another fix on VectorInsert FP32 (thanks to LDj3SNuD

* Float operands don't need to use the same register when VEX is supported

* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks

* Some nits, small improvements on the pre allocator

* CpuThreadState is gone

* Allow changing CPU emulators with a config entry

* Add runtime identifiers on the ARMeilleure project

* Allow switching between CPUs through a config entry (pt. 2)

* Change win10-x64 to win-x64 on projects

* Update the Ryujinx project to use ARMeilleure

* Ensure that the selected register is valid on the hybrid allocator

* Allow exiting on returns to 0 (should fix test regression)

* Remove register assignments for most used variables on the hybrid allocator

* Do not use fixed registers as spill temp

* Add missing namespace and remove unneeded using

* Address PR feedback

* Fix types, etc

* Enable AssumeStrictAbiCompliance by default

* Ensure that Spill and Fill don't load or store any more than necessary
---
 ARMeilleure/ARMeilleure.csproj                |   20 +
 ARMeilleure/CodeGen/CompiledFunction.cs       |   17 +
 .../CodeGen/Optimizations/ConstantFolding.cs  |  258 ++
 .../CodeGen/Optimizations/Optimizer.cs        |  126 +
 .../CodeGen/Optimizations/Simplification.cs   |  157 +
 .../RegisterAllocators/AllocationResult.cs    |   19 +
 .../RegisterAllocators/CopyResolver.cs        |  246 ++
 .../RegisterAllocators/HybridAllocator.cs     |  382 ++
 .../RegisterAllocators/IRegisterAllocator.cs  |   12 +
 .../RegisterAllocators/LinearScanAllocator.cs | 1019 ++++++
 .../RegisterAllocators/LiveInterval.cs        |  390 ++
 .../CodeGen/RegisterAllocators/LiveRange.cs   |   31 +
 .../RegisterAllocators/RegisterMasks.cs       |   47 +
 .../RegisterAllocators/StackAllocator.cs      |   27 +
 ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs   |   18 +
 .../CodeGen/Unwinding/UnwindPushEntry.cs      |   20 +
 ARMeilleure/CodeGen/X86/Assembler.cs          | 1358 +++++++
 ARMeilleure/CodeGen/X86/CallConvName.cs       |    8 +
 ARMeilleure/CodeGen/X86/CallingConvention.cs  |  159 +
 ARMeilleure/CodeGen/X86/CodeGenContext.cs     |  305 ++
 ARMeilleure/CodeGen/X86/CodeGenerator.cs      | 1661 +++++++++
 .../CodeGen/X86/HardwareCapabilities.cs       |   52 +
 ARMeilleure/CodeGen/X86/IntrinsicInfo.cs      |   14 +
 ARMeilleure/CodeGen/X86/IntrinsicTable.cs     |  160 +
 ARMeilleure/CodeGen/X86/IntrinsicType.cs      |   14 +
 ARMeilleure/CodeGen/X86/PreAllocator.cs       | 1280 +++++++
 ARMeilleure/CodeGen/X86/X86Condition.cs       |   22 +
 ARMeilleure/CodeGen/X86/X86Instruction.cs     |  190 +
 ARMeilleure/CodeGen/X86/X86Register.cs        |   41 +
 ARMeilleure/Common/BitMap.cs                  |  138 +
 ARMeilleure/Common/BitUtils.cs                |  109 +
 ARMeilleure/Common/EnumUtils.cs               |   12 +
 ARMeilleure/Decoders/Block.cs                 |   99 +
 ARMeilleure/Decoders/Condition.cs             |   32 +
 ARMeilleure/Decoders/DataOp.cs                |   10 +
 ARMeilleure/Decoders/Decoder.cs               |  351 ++
 ARMeilleure/Decoders/DecoderHelper.cs         |  113 +
 ARMeilleure/Decoders/IOpCode.cs               |   17 +
 ARMeilleure/Decoders/IOpCode32.cs             |    9 +
 ARMeilleure/Decoders/IOpCode32Alu.cs          |   10 +
 ARMeilleure/Decoders/IOpCode32BImm.cs         |    4 +
 ARMeilleure/Decoders/IOpCode32BReg.cs         |    7 +
 ARMeilleure/Decoders/IOpCode32Mem.cs          |   12 +
 ARMeilleure/Decoders/IOpCode32MemMult.cs      |   13 +
 ARMeilleure/Decoders/IOpCodeAlu.cs            |   10 +
 ARMeilleure/Decoders/IOpCodeAluImm.cs         |    7 +
 ARMeilleure/Decoders/IOpCodeAluRs.cs          |   10 +
 ARMeilleure/Decoders/IOpCodeAluRx.cs          |   10 +
 ARMeilleure/Decoders/IOpCodeBImm.cs           |    7 +
 ARMeilleure/Decoders/IOpCodeCond.cs           |    7 +
 ARMeilleure/Decoders/IOpCodeLit.cs            |   11 +
 ARMeilleure/Decoders/IOpCodeSimd.cs           |    7 +
 ARMeilleure/Decoders/InstDescriptor.cs        |   18 +
 ARMeilleure/Decoders/InstEmitter.cs           |    6 +
 ARMeilleure/Decoders/IntType.cs               |   14 +
 ARMeilleure/Decoders/OpCode.cs                |   48 +
 ARMeilleure/Decoders/OpCode32.cs              |   21 +
 ARMeilleure/Decoders/OpCode32Alu.cs           |   18 +
 ARMeilleure/Decoders/OpCode32AluImm.cs        |   21 +
 ARMeilleure/Decoders/OpCode32AluRsImm.cs      |   18 +
 ARMeilleure/Decoders/OpCode32BImm.cs          |   27 +
 ARMeilleure/Decoders/OpCode32BReg.cs          |   12 +
 ARMeilleure/Decoders/OpCode32Mem.cs           |   37 +
 ARMeilleure/Decoders/OpCode32MemImm.cs        |   10 +
 ARMeilleure/Decoders/OpCode32MemImm8.cs       |   13 +
 ARMeilleure/Decoders/OpCode32MemMult.cs       |   55 +
 ARMeilleure/Decoders/OpCodeAdr.cs             |   17 +
 ARMeilleure/Decoders/OpCodeAlu.cs             |   21 +
 ARMeilleure/Decoders/OpCodeAluBinary.cs       |   12 +
 ARMeilleure/Decoders/OpCodeAluImm.cs          |   38 +
 ARMeilleure/Decoders/OpCodeAluRs.cs           |   27 +
 ARMeilleure/Decoders/OpCodeAluRx.cs           |   17 +
 ARMeilleure/Decoders/OpCodeBImm.cs            |    9 +
 ARMeilleure/Decoders/OpCodeBImmAl.cs          |   10 +
 ARMeilleure/Decoders/OpCodeBImmCmp.cs         |   18 +
 ARMeilleure/Decoders/OpCodeBImmCond.cs        |   23 +
 ARMeilleure/Decoders/OpCodeBImmTest.cs        |   18 +
 ARMeilleure/Decoders/OpCodeBReg.cs            |   22 +
 ARMeilleure/Decoders/OpCodeBfm.cs             |   27 +
 ARMeilleure/Decoders/OpCodeCcmp.cs            |   30 +
 ARMeilleure/Decoders/OpCodeCcmpImm.cs         |    9 +
 ARMeilleure/Decoders/OpCodeCcmpReg.cs         |   13 +
 ARMeilleure/Decoders/OpCodeCsel.cs            |   15 +
 ARMeilleure/Decoders/OpCodeException.cs       |   12 +
 ARMeilleure/Decoders/OpCodeMem.cs             |   17 +
 ARMeilleure/Decoders/OpCodeMemEx.cs           |   14 +
 ARMeilleure/Decoders/OpCodeMemImm.cs          |   51 +
 ARMeilleure/Decoders/OpCodeMemLit.cs          |   26 +
 ARMeilleure/Decoders/OpCodeMemPair.cs         |   23 +
 ARMeilleure/Decoders/OpCodeMemReg.cs          |   18 +
 ARMeilleure/Decoders/OpCodeMov.cs             |   36 +
 ARMeilleure/Decoders/OpCodeMul.cs             |   14 +
 ARMeilleure/Decoders/OpCodeSimd.cs            |   22 +
 ARMeilleure/Decoders/OpCodeSimdCvt.cs         |   19 +
 ARMeilleure/Decoders/OpCodeSimdExt.cs         |   12 +
 ARMeilleure/Decoders/OpCodeSimdFcond.cs       |   15 +
 ARMeilleure/Decoders/OpCodeSimdFmov.cs        |   31 +
 ARMeilleure/Decoders/OpCodeSimdImm.cs         |   98 +
 ARMeilleure/Decoders/OpCodeSimdIns.cs         |   34 +
 ARMeilleure/Decoders/OpCodeSimdMemImm.cs      |   17 +
 ARMeilleure/Decoders/OpCodeSimdMemLit.cs      |   29 +
 ARMeilleure/Decoders/OpCodeSimdMemMs.cs       |   46 +
 ARMeilleure/Decoders/OpCodeSimdMemPair.cs     |   14 +
 ARMeilleure/Decoders/OpCodeSimdMemReg.cs      |   12 +
 ARMeilleure/Decoders/OpCodeSimdMemSs.cs       |   95 +
 ARMeilleure/Decoders/OpCodeSimdReg.cs         |   16 +
 ARMeilleure/Decoders/OpCodeSimdRegElem.cs     |   29 +
 ARMeilleure/Decoders/OpCodeSimdRegElemF.cs    |   31 +
 ARMeilleure/Decoders/OpCodeSimdShImm.cs       |   16 +
 ARMeilleure/Decoders/OpCodeSimdTbl.cs         |   10 +
 ARMeilleure/Decoders/OpCodeSystem.cs          |   22 +
 ARMeilleure/Decoders/OpCodeT16.cs             |   12 +
 ARMeilleure/Decoders/OpCodeT16AluImm8.cs      |   20 +
 ARMeilleure/Decoders/OpCodeT16BReg.cs         |   12 +
 ARMeilleure/Decoders/OpCodeTable.cs           |  787 ++++
 ARMeilleure/Decoders/RegisterSize.cs          |   10 +
 ARMeilleure/Decoders/ShiftType.cs             |   10 +
 ARMeilleure/Diagnostics/IRDumper.cs           |  168 +
 ARMeilleure/Diagnostics/Logger.cs             |   59 +
 ARMeilleure/Diagnostics/PassName.cs           |   17 +
 ARMeilleure/Instructions/CryptoHelper.cs      |  279 ++
 ARMeilleure/Instructions/DelegateTypes.cs     |   78 +
 ARMeilleure/Instructions/InstEmitAlu.cs       |  369 ++
 ARMeilleure/Instructions/InstEmitAlu32.cs     |  129 +
 ARMeilleure/Instructions/InstEmitAluHelper.cs |  351 ++
 ARMeilleure/Instructions/InstEmitBfm.cs       |  196 +
 ARMeilleure/Instructions/InstEmitCcmp.cs      |   61 +
 ARMeilleure/Instructions/InstEmitCsel.cs      |   53 +
 ARMeilleure/Instructions/InstEmitDiv.cs       |   67 +
 ARMeilleure/Instructions/InstEmitException.cs |   55 +
 ARMeilleure/Instructions/InstEmitFlow.cs      |  159 +
 ARMeilleure/Instructions/InstEmitFlow32.cs    |   71 +
 .../Instructions/InstEmitFlowHelper.cs        |  192 +
 ARMeilleure/Instructions/InstEmitHash.cs      |   64 +
 ARMeilleure/Instructions/InstEmitHelper.cs    |  218 ++
 ARMeilleure/Instructions/InstEmitMemory.cs    |  177 +
 ARMeilleure/Instructions/InstEmitMemory32.cs  |  256 ++
 ARMeilleure/Instructions/InstEmitMemoryEx.cs  |  261 ++
 .../Instructions/InstEmitMemoryHelper.cs      |  512 +++
 ARMeilleure/Instructions/InstEmitMove.cs      |   41 +
 ARMeilleure/Instructions/InstEmitMul.cs       |  100 +
 .../Instructions/InstEmitSimdArithmetic.cs    | 3159 +++++++++++++++++
 ARMeilleure/Instructions/InstEmitSimdCmp.cs   |  712 ++++
 .../Instructions/InstEmitSimdCrypto.cs        |   49 +
 ARMeilleure/Instructions/InstEmitSimdCvt.cs   | 1166 ++++++
 ARMeilleure/Instructions/InstEmitSimdHash.cs  |  147 +
 .../Instructions/InstEmitSimdHelper.cs        | 1477 ++++++++
 .../Instructions/InstEmitSimdLogical.cs       |  456 +++
 .../Instructions/InstEmitSimdMemory.cs        |  160 +
 ARMeilleure/Instructions/InstEmitSimdMove.cs  |  794 +++++
 ARMeilleure/Instructions/InstEmitSimdShift.cs | 1057 ++++++
 ARMeilleure/Instructions/InstEmitSystem.cs    |  114 +
 ARMeilleure/Instructions/InstName.cs          |  459 +++
 ARMeilleure/Instructions/NativeInterface.cs   |  367 ++
 ARMeilleure/Instructions/SoftFallback.cs      | 1307 +++++++
 ARMeilleure/Instructions/SoftFloat.cs         | 2757 ++++++++++++++
 .../IntermediateRepresentation/BasicBlock.cs  |   83 +
 .../IntermediateRepresentation/Instruction.cs |   79 +
 .../IntermediateRepresentation/Intrinsic.cs   |  138 +
 .../IntrinsicOperation.cs                     |   12 +
 .../MemoryOperand.cs                          |   25 +
 .../IntermediateRepresentation/Multiplier.cs  |   10 +
 .../IntermediateRepresentation/Node.cs        |  163 +
 .../IntermediateRepresentation/Operand.cs     |  124 +
 .../OperandHelper.cs                          |   68 +
 .../IntermediateRepresentation/OperandKind.cs |   12 +
 .../IntermediateRepresentation/OperandType.cs |   51 +
 .../IntermediateRepresentation/Operation.cs   |   40 +
 .../IntermediateRepresentation/PhiNode.cs     |   22 +
 .../IntermediateRepresentation/Register.cs    |   43 +
 .../RegisterType.cs                           |    9 +
 ARMeilleure/Memory/IMemory.cs                 |   37 +
 ARMeilleure/Memory/IMemoryManager.cs          |   40 +
 ARMeilleure/Memory/MemoryHelper.cs            |   71 +
 ARMeilleure/Memory/MemoryManagement.cs        |  114 +
 ARMeilleure/Memory/MemoryManagementUnix.cs    |   71 +
 ARMeilleure/Memory/MemoryManagementWindows.cs |  156 +
 ARMeilleure/Memory/MemoryManager.cs           |  835 +++++
 ARMeilleure/Memory/MemoryManagerPal.cs        |   77 +
 ARMeilleure/Memory/MemoryProtection.cs        |   17 +
 .../Memory/MemoryProtectionException.cs       |    9 +
 ARMeilleure/Optimizations.cs                  |   33 +
 ARMeilleure/State/Aarch32Mode.cs              |   15 +
 ARMeilleure/State/ExecutionContext.cs         |  130 +
 ARMeilleure/State/ExecutionMode.cs            |    9 +
 ARMeilleure/State/FPCR.cs                     |   23 +
 ARMeilleure/State/FPException.cs              |   12 +
 ARMeilleure/State/FPRoundingMode.cs           |   10 +
 ARMeilleure/State/FPSR.cs                     |   11 +
 ARMeilleure/State/FPType.cs                   |   11 +
 ARMeilleure/State/IExecutionContext.cs        |   37 +
 ARMeilleure/State/InstExceptionEventArgs.cs   |   16 +
 ARMeilleure/State/InstUndefinedEventArgs.cs   |   16 +
 ARMeilleure/State/NativeContext.cs            |  157 +
 ARMeilleure/State/PState.cs                   |   16 +
 ARMeilleure/State/RegisterAlias.cs            |   41 +
 ARMeilleure/State/RegisterConsts.cs           |   13 +
 ARMeilleure/State/V128.cs                     |  214 ++
 ARMeilleure/Statistics.cs                     |   92 +
 ARMeilleure/Translation/ArmEmitterContext.cs  |  153 +
 ARMeilleure/Translation/Compiler.cs           |   47 +
 ARMeilleure/Translation/CompilerContext.cs    |   26 +
 ARMeilleure/Translation/CompilerOptions.cs    |   16 +
 ARMeilleure/Translation/ControlFlowGraph.cs   |  158 +
 ARMeilleure/Translation/DelegateCache.cs      |   26 +
 ARMeilleure/Translation/Dominance.cs          |   95 +
 ARMeilleure/Translation/EmitterContext.cs     |  562 +++
 ARMeilleure/Translation/GuestFunction.cs      |    6 +
 ARMeilleure/Translation/ITranslator.cs        |    9 +
 ARMeilleure/Translation/JitCache.cs           |  135 +
 ARMeilleure/Translation/JitCacheEntry.cs      |   19 +
 ARMeilleure/Translation/JitUnwindWindows.cs   |  164 +
 ARMeilleure/Translation/PriorityQueue.cs      |   39 +
 ARMeilleure/Translation/RegisterToLocal.cs    |   52 +
 ARMeilleure/Translation/RegisterUsage.cs      |  413 +++
 ARMeilleure/Translation/SsaConstruction.cs    |  293 ++
 ARMeilleure/Translation/SsaDeconstruction.cs  |   46 +
 ARMeilleure/Translation/TranslatedFunction.cs |   30 +
 ARMeilleure/Translation/Translator.cs         |  253 ++
 ChocolArm64/ChocolArm64.csproj                |    3 +-
 ChocolArm64/CpuThread.cs                      |   66 -
 .../Instructions/InstEmitMemoryHelper.cs      |   10 +-
 ChocolArm64/Instructions/InstEmitSystem.cs    |    8 +-
 ChocolArm64/Instructions/SoftFloat.cs         |  112 +-
 ChocolArm64/Memory/MemoryManager.cs           |   10 +-
 ChocolArm64/Optimizations.cs                  |   35 +-
 ChocolArm64/State/CpuThreadState.cs           |  187 +-
 ChocolArm64/Translation/Translator.cs         |   23 +-
 Ryujinx.Audio/Ryujinx.Audio.csproj            |    2 +-
 Ryujinx.Common/Ryujinx.Common.csproj          |    4 +-
 .../Graphics3d/Texture/ImageUtils.cs          |    7 +-
 .../Graphics3d/Texture/TextureHelper.cs       |    6 +-
 Ryujinx.Graphics/Memory/NvGpuVmm.cs           |    6 +-
 Ryujinx.Graphics/Memory/NvGpuVmmCache.cs      |    6 +-
 Ryujinx.Graphics/Ryujinx.Graphics.csproj      |    5 +-
 Ryujinx.Graphics/VDec/VideoDecoder.cs         |    2 +-
 Ryujinx.HLE/DeviceMemory.cs                   |    2 +-
 .../UndefinedInstructionException.cs          |    2 +-
 Ryujinx.HLE/HOS/Homebrew.cs                   |    8 +-
 Ryujinx.HLE/HOS/Horizon.cs                    |    2 +
 Ryujinx.HLE/HOS/Ipc/IpcHandler.cs             |    4 +-
 .../HOS/Kernel/Common/KernelTransfer.cs       |    2 +-
 .../HOS/Kernel/Memory/KMemoryManager.cs       |    6 +-
 .../HOS/Kernel/Process/HleProcessDebugger.cs  |   12 +-
 Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs    |   48 +-
 .../HOS/Kernel/SupervisorCall/SvcHandler.cs   |   15 +-
 .../HOS/Kernel/SupervisorCall/SvcIpc.cs       |    2 +-
 .../HOS/Kernel/SupervisorCall/SvcSystem.cs    |    4 +-
 .../HOS/Kernel/SupervisorCall/SvcTable.cs     |   59 +-
 .../HOS/Kernel/SupervisorCall/SvcThread.cs    |  149 +-
 .../HOS/Kernel/Threading/HleScheduler.cs      |   18 +-
 .../HOS/Kernel/Threading/KCoreContext.cs      |    6 +-
 .../HOS/Kernel/Threading/KCriticalSection.cs  |   10 +-
 .../HOS/Kernel/Threading/KScheduler.cs        |    2 +-
 Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs   |   72 +-
 Ryujinx.HLE/HOS/ProgramLoader.cs              |    2 +-
 Ryujinx.HLE/HOS/ServiceCtx.cs                 |    6 +-
 Ryujinx.HLE/HOS/Services/Acc/IProfile.cs      |    2 +-
 .../HOS/Services/Aud/AudioOut/IAudioOut.cs    |    2 +-
 .../Aud/AudioRenderer/IAudioRenderer.cs       |    6 +-
 .../Aud/AudioRenderer/VoiceContext.cs         |    6 +-
 .../HOS/Services/Aud/IAudioOutManager.cs      |    2 +-
 Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs  |    2 +-
 Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs |    2 +-
 .../HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs   |    2 +-
 .../HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs |    2 +-
 .../Nv/NvHostChannel/NvHostChannelIoctl.cs    |    2 +-
 .../Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs |    2 +-
 .../HOS/Services/Nv/NvMap/NvMapIoctl.cs       |    2 +-
 .../Time/Clock/StandardSteadyClockCore.cs     |    2 +-
 .../Time/Clock/TickBasedSteadyClockCore.cs    |    2 +-
 .../HOS/Services/Time/IStaticService.cs       |    2 +-
 .../HOS/Services/Time/ITimeZoneService.cs     |    4 +-
 .../Services/Vi/IApplicationDisplayService.cs |    2 +-
 Ryujinx.HLE/Ryujinx.HLE.csproj                |    6 +-
 Ryujinx.HLE/Utilities/StructReader.cs         |    6 +-
 Ryujinx.HLE/Utilities/StructWriter.cs         |    6 +-
 Ryujinx.LLE/Luea.csproj                       |    2 +-
 Ryujinx.Profiler/Ryujinx.Profiler.csproj      |    2 +-
 .../Ryujinx.ShaderTools.csproj                |    2 +-
 Ryujinx.Tests.Unicorn/IndexedProperty.cs      |   14 +-
 Ryujinx.Tests.Unicorn/Native/Interface.cs     |    4 +-
 Ryujinx.Tests.Unicorn/Native/UnicornArch.cs   |    2 +-
 Ryujinx.Tests.Unicorn/Native/UnicornMode.cs   |    2 +-
 .../Ryujinx.Tests.Unicorn.csproj              |    3 +-
 Ryujinx.Tests.Unicorn/SimdValue.cs            |  112 +
 Ryujinx.Tests.Unicorn/UnicornAArch64.cs       |  204 +-
 Ryujinx.Tests/Cpu/CpuTest.cs                  |  478 ++-
 Ryujinx.Tests/Cpu/CpuTestAluBinary.cs         |  238 ++
 Ryujinx.Tests/Cpu/CpuTestAluRs.cs             |  224 --
 Ryujinx.Tests/Cpu/CpuTestMisc.cs              |   82 +-
 Ryujinx.Tests/Cpu/CpuTestSimd.cs              |  491 +--
 Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs        |   64 +-
 Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs           |   35 +-
 Ryujinx.Tests/Cpu/CpuTestSimdExt.cs           |   16 +-
 Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs         |   23 +-
 Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs          |    8 +-
 Ryujinx.Tests/Cpu/CpuTestSimdImm.cs           |   19 +-
 Ryujinx.Tests/Cpu/CpuTestSimdIns.cs           |   82 +-
 Ryujinx.Tests/Cpu/CpuTestSimdReg.cs           |  933 ++---
 Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs       |   28 +-
 Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs      |   51 +-
 Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs         |  107 +-
 Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs           |   69 +-
 Ryujinx.Tests/Ryujinx.Tests.csproj            |    5 +-
 Ryujinx.sln                                   |    6 +-
 Ryujinx/Config.jsonc                          |    6 +-
 Ryujinx/Configuration.cs                      |   10 +-
 Ryujinx/Ryujinx.csproj                        |    4 +-
 Ryujinx/_schema.json                          |   14 +-
 310 files changed, 37389 insertions(+), 2086 deletions(-)
 create mode 100644 ARMeilleure/ARMeilleure.csproj
 create mode 100644 ARMeilleure/CodeGen/CompiledFunction.cs
 create mode 100644 ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
 create mode 100644 ARMeilleure/CodeGen/Optimizations/Optimizer.cs
 create mode 100644 ARMeilleure/CodeGen/Optimizations/Simplification.cs
 create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
 create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
 create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
 create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
 create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
 create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
 create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
 create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
 create mode 100644 ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
 create mode 100644 ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
 create mode 100644 ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
 create mode 100644 ARMeilleure/CodeGen/X86/Assembler.cs
 create mode 100644 ARMeilleure/CodeGen/X86/CallConvName.cs
 create mode 100644 ARMeilleure/CodeGen/X86/CallingConvention.cs
 create mode 100644 ARMeilleure/CodeGen/X86/CodeGenContext.cs
 create mode 100644 ARMeilleure/CodeGen/X86/CodeGenerator.cs
 create mode 100644 ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
 create mode 100644 ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
 create mode 100644 ARMeilleure/CodeGen/X86/IntrinsicTable.cs
 create mode 100644 ARMeilleure/CodeGen/X86/IntrinsicType.cs
 create mode 100644 ARMeilleure/CodeGen/X86/PreAllocator.cs
 create mode 100644 ARMeilleure/CodeGen/X86/X86Condition.cs
 create mode 100644 ARMeilleure/CodeGen/X86/X86Instruction.cs
 create mode 100644 ARMeilleure/CodeGen/X86/X86Register.cs
 create mode 100644 ARMeilleure/Common/BitMap.cs
 create mode 100644 ARMeilleure/Common/BitUtils.cs
 create mode 100644 ARMeilleure/Common/EnumUtils.cs
 create mode 100644 ARMeilleure/Decoders/Block.cs
 create mode 100644 ARMeilleure/Decoders/Condition.cs
 create mode 100644 ARMeilleure/Decoders/DataOp.cs
 create mode 100644 ARMeilleure/Decoders/Decoder.cs
 create mode 100644 ARMeilleure/Decoders/DecoderHelper.cs
 create mode 100644 ARMeilleure/Decoders/IOpCode.cs
 create mode 100644 ARMeilleure/Decoders/IOpCode32.cs
 create mode 100644 ARMeilleure/Decoders/IOpCode32Alu.cs
 create mode 100644 ARMeilleure/Decoders/IOpCode32BImm.cs
 create mode 100644 ARMeilleure/Decoders/IOpCode32BReg.cs
 create mode 100644 ARMeilleure/Decoders/IOpCode32Mem.cs
 create mode 100644 ARMeilleure/Decoders/IOpCode32MemMult.cs
 create mode 100644 ARMeilleure/Decoders/IOpCodeAlu.cs
 create mode 100644 ARMeilleure/Decoders/IOpCodeAluImm.cs
 create mode 100644 ARMeilleure/Decoders/IOpCodeAluRs.cs
 create mode 100644 ARMeilleure/Decoders/IOpCodeAluRx.cs
 create mode 100644 ARMeilleure/Decoders/IOpCodeBImm.cs
 create mode 100644 ARMeilleure/Decoders/IOpCodeCond.cs
 create mode 100644 ARMeilleure/Decoders/IOpCodeLit.cs
 create mode 100644 ARMeilleure/Decoders/IOpCodeSimd.cs
 create mode 100644 ARMeilleure/Decoders/InstDescriptor.cs
 create mode 100644 ARMeilleure/Decoders/InstEmitter.cs
 create mode 100644 ARMeilleure/Decoders/IntType.cs
 create mode 100644 ARMeilleure/Decoders/OpCode.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32Alu.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32AluImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32AluRsImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32BImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32BReg.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32Mem.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32MemImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32MemImm8.cs
 create mode 100644 ARMeilleure/Decoders/OpCode32MemMult.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeAdr.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeAlu.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeAluBinary.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeAluImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeAluRs.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeAluRx.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeBImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeBImmAl.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeBImmCmp.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeBImmCond.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeBImmTest.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeBReg.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeBfm.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeCcmp.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeCcmpImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeCcmpReg.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeCsel.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeException.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeMem.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeMemEx.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeMemImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeMemLit.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeMemPair.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeMemReg.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeMov.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeMul.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimd.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdCvt.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdExt.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdFcond.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdFmov.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdIns.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemLit.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemMs.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemPair.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemReg.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdMemSs.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdReg.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdRegElem.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdRegElemF.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdShImm.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSimdTbl.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeSystem.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeT16.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeT16AluImm8.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeT16BReg.cs
 create mode 100644 ARMeilleure/Decoders/OpCodeTable.cs
 create mode 100644 ARMeilleure/Decoders/RegisterSize.cs
 create mode 100644 ARMeilleure/Decoders/ShiftType.cs
 create mode 100644 ARMeilleure/Diagnostics/IRDumper.cs
 create mode 100644 ARMeilleure/Diagnostics/Logger.cs
 create mode 100644 ARMeilleure/Diagnostics/PassName.cs
 create mode 100644 ARMeilleure/Instructions/CryptoHelper.cs
 create mode 100644 ARMeilleure/Instructions/DelegateTypes.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitAlu.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitAlu32.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitAluHelper.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitBfm.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitCcmp.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitCsel.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitDiv.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitException.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitFlow.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitFlow32.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitFlowHelper.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitHash.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitHelper.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitMemory.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitMemory32.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitMemoryEx.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitMemoryHelper.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitMove.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitMul.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdCmp.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdCrypto.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdCvt.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdHash.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdHelper.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdLogical.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdMemory.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdMove.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSimdShift.cs
 create mode 100644 ARMeilleure/Instructions/InstEmitSystem.cs
 create mode 100644 ARMeilleure/Instructions/InstName.cs
 create mode 100644 ARMeilleure/Instructions/NativeInterface.cs
 create mode 100644 ARMeilleure/Instructions/SoftFallback.cs
 create mode 100644 ARMeilleure/Instructions/SoftFloat.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/BasicBlock.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/Instruction.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/Intrinsic.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/MemoryOperand.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/Multiplier.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/Node.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/Operand.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/OperandHelper.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/OperandKind.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/OperandType.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/Operation.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/PhiNode.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/Register.cs
 create mode 100644 ARMeilleure/IntermediateRepresentation/RegisterType.cs
 create mode 100644 ARMeilleure/Memory/IMemory.cs
 create mode 100644 ARMeilleure/Memory/IMemoryManager.cs
 create mode 100644 ARMeilleure/Memory/MemoryHelper.cs
 create mode 100644 ARMeilleure/Memory/MemoryManagement.cs
 create mode 100644 ARMeilleure/Memory/MemoryManagementUnix.cs
 create mode 100644 ARMeilleure/Memory/MemoryManagementWindows.cs
 create mode 100644 ARMeilleure/Memory/MemoryManager.cs
 create mode 100644 ARMeilleure/Memory/MemoryManagerPal.cs
 create mode 100644 ARMeilleure/Memory/MemoryProtection.cs
 create mode 100644 ARMeilleure/Memory/MemoryProtectionException.cs
 create mode 100644 ARMeilleure/Optimizations.cs
 create mode 100644 ARMeilleure/State/Aarch32Mode.cs
 create mode 100644 ARMeilleure/State/ExecutionContext.cs
 create mode 100644 ARMeilleure/State/ExecutionMode.cs
 create mode 100644 ARMeilleure/State/FPCR.cs
 create mode 100644 ARMeilleure/State/FPException.cs
 create mode 100644 ARMeilleure/State/FPRoundingMode.cs
 create mode 100644 ARMeilleure/State/FPSR.cs
 create mode 100644 ARMeilleure/State/FPType.cs
 create mode 100644 ARMeilleure/State/IExecutionContext.cs
 create mode 100644 ARMeilleure/State/InstExceptionEventArgs.cs
 create mode 100644 ARMeilleure/State/InstUndefinedEventArgs.cs
 create mode 100644 ARMeilleure/State/NativeContext.cs
 create mode 100644 ARMeilleure/State/PState.cs
 create mode 100644 ARMeilleure/State/RegisterAlias.cs
 create mode 100644 ARMeilleure/State/RegisterConsts.cs
 create mode 100644 ARMeilleure/State/V128.cs
 create mode 100644 ARMeilleure/Statistics.cs
 create mode 100644 ARMeilleure/Translation/ArmEmitterContext.cs
 create mode 100644 ARMeilleure/Translation/Compiler.cs
 create mode 100644 ARMeilleure/Translation/CompilerContext.cs
 create mode 100644 ARMeilleure/Translation/CompilerOptions.cs
 create mode 100644 ARMeilleure/Translation/ControlFlowGraph.cs
 create mode 100644 ARMeilleure/Translation/DelegateCache.cs
 create mode 100644 ARMeilleure/Translation/Dominance.cs
 create mode 100644 ARMeilleure/Translation/EmitterContext.cs
 create mode 100644 ARMeilleure/Translation/GuestFunction.cs
 create mode 100644 ARMeilleure/Translation/ITranslator.cs
 create mode 100644 ARMeilleure/Translation/JitCache.cs
 create mode 100644 ARMeilleure/Translation/JitCacheEntry.cs
 create mode 100644 ARMeilleure/Translation/JitUnwindWindows.cs
 create mode 100644 ARMeilleure/Translation/PriorityQueue.cs
 create mode 100644 ARMeilleure/Translation/RegisterToLocal.cs
 create mode 100644 ARMeilleure/Translation/RegisterUsage.cs
 create mode 100644 ARMeilleure/Translation/SsaConstruction.cs
 create mode 100644 ARMeilleure/Translation/SsaDeconstruction.cs
 create mode 100644 ARMeilleure/Translation/TranslatedFunction.cs
 create mode 100644 ARMeilleure/Translation/Translator.cs
 delete mode 100644 ChocolArm64/CpuThread.cs
 create mode 100644 Ryujinx.Tests.Unicorn/SimdValue.cs
 create mode 100644 Ryujinx.Tests/Cpu/CpuTestAluBinary.cs

diff --git a/ARMeilleure/ARMeilleure.csproj b/ARMeilleure/ARMeilleure.csproj
new file mode 100644
index 000000000..9268dcbee
--- /dev/null
+++ b/ARMeilleure/ARMeilleure.csproj
@@ -0,0 +1,20 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>netcoreapp2.1</TargetFramework>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Mono.Posix.NETStandard" Version="1.0.0" />
+  </ItemGroup>
+
+</Project>
diff --git a/ARMeilleure/CodeGen/CompiledFunction.cs b/ARMeilleure/CodeGen/CompiledFunction.cs
new file mode 100644
index 000000000..61e89c240
--- /dev/null
+++ b/ARMeilleure/CodeGen/CompiledFunction.cs
@@ -0,0 +1,17 @@
+using ARMeilleure.CodeGen.Unwinding;
+
+namespace ARMeilleure.CodeGen
+{
+    struct CompiledFunction
+    {
+        public byte[] Code { get; }
+
+        public UnwindInfo UnwindInfo { get; }
+
+        public CompiledFunction(byte[] code, UnwindInfo unwindInfo)
+        {
+            Code       = code;
+            UnwindInfo = unwindInfo;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
new file mode 100644
index 000000000..84eedee0e
--- /dev/null
+++ b/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
@@ -0,0 +1,258 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+    static class ConstantFolding
+    {
+        public static void RunPass(Operation operation)
+        {
+            if (operation.Destination == null || operation.SourcesCount == 0)
+            {
+                return;
+            }
+
+            if (!AreAllSourcesConstant(operation))
+            {
+                return;
+            }
+
+            OperandType type = operation.Destination.Type;
+
+            switch (operation.Instruction)
+            {
+                case Instruction.Add:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => x + y);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => x + y);
+                    }
+                    break;
+
+                case Instruction.BitwiseAnd:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => x & y);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => x & y);
+                    }
+                    break;
+
+                case Instruction.BitwiseExclusiveOr:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => x ^ y);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => x ^ y);
+                    }
+                    break;
+
+                case Instruction.BitwiseNot:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateUnaryI32(operation, (x) => ~x);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateUnaryI64(operation, (x) => ~x);
+                    }
+                    break;
+
+                case Instruction.BitwiseOr:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => x | y);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => x | y);
+                    }
+                    break;
+
+                case Instruction.Copy:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateUnaryI32(operation, (x) => x);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateUnaryI64(operation, (x) => x);
+                    }
+                    break;
+
+                case Instruction.Divide:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0);
+                    }
+                    break;
+
+                case Instruction.DivideUI:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0);
+                    }
+                    break;
+
+                 case Instruction.Multiply:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => x * y);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => x * y);
+                    }
+                    break;
+
+                case Instruction.Negate:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateUnaryI32(operation, (x) => -x);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateUnaryI64(operation, (x) => -x);
+                    }
+                    break;
+
+                case Instruction.ShiftLeft:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => x << y);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => x << (int)y);
+                    }
+                    break;
+
+                case Instruction.ShiftRightSI:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => x >> y);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => x >> (int)y);
+                    }
+                    break;
+
+                case Instruction.ShiftRightUI:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y));
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y));
+                    }
+                    break;
+
+                case Instruction.SignExtend16:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateUnaryI32(operation, (x) => (short)x);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateUnaryI64(operation, (x) => (short)x);
+                    }
+                    break;
+
+                case Instruction.SignExtend32:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateUnaryI32(operation, (x) => x);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateUnaryI64(operation, (x) => (int)x);
+                    }
+                    break;
+
+                case Instruction.SignExtend8:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateUnaryI32(operation, (x) => (sbyte)x);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateUnaryI64(operation, (x) => (sbyte)x);
+                    }
+                    break;
+
+                case Instruction.Subtract:
+                    if (type == OperandType.I32)
+                    {
+                        EvaluateBinaryI32(operation, (x, y) => x - y);
+                    }
+                    else if (type == OperandType.I64)
+                    {
+                        EvaluateBinaryI64(operation, (x, y) => x - y);
+                    }
+                    break;
+            }
+        }
+
+        private static bool AreAllSourcesConstant(Operation operation)
+        {
+            for (int index = 0; index < operation.SourcesCount; index++)
+            {
+                if (operation.GetSource(index).Kind != OperandKind.Constant)
+                {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+
+        private static void EvaluateUnaryI32(Operation operation, Func<int, int> op)
+        {
+            int x = operation.GetSource(0).AsInt32();
+
+            operation.TurnIntoCopy(Const(op(x)));
+        }
+
+        private static void EvaluateUnaryI64(Operation operation, Func<long, long> op)
+        {
+            long x = operation.GetSource(0).AsInt64();
+
+            operation.TurnIntoCopy(Const(op(x)));
+        }
+
+        private static void EvaluateBinaryI32(Operation operation, Func<int, int, int> op)
+        {
+            int x = operation.GetSource(0).AsInt32();
+            int y = operation.GetSource(1).AsInt32();
+
+            operation.TurnIntoCopy(Const(op(x, y)));
+        }
+
+        private static void EvaluateBinaryI64(Operation operation, Func<long, long, long> op)
+        {
+            long x = operation.GetSource(0).AsInt64();
+            long y = operation.GetSource(1).AsInt64();
+
+            operation.TurnIntoCopy(Const(op(x, y)));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
new file mode 100644
index 000000000..c01a8f1e7
--- /dev/null
+++ b/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -0,0 +1,126 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+    static class Optimizer
+    {
+        public static void RunPass(ControlFlowGraph cfg)
+        {
+            bool modified;
+
+            do
+            {
+                modified = false;
+
+                foreach (BasicBlock block in cfg.Blocks)
+                {
+                    LinkedListNode<Node> node = block.Operations.First;
+
+                    while (node != null)
+                    {
+                        LinkedListNode<Node> nextNode = node.Next;
+
+                        bool isUnused = IsUnused(node.Value);
+
+                        if (!(node.Value is Operation operation) || isUnused)
+                        {
+                            if (isUnused)
+                            {
+                                RemoveNode(block, node);
+
+                                modified = true;
+                            }
+
+                            node = nextNode;
+
+                            continue;
+                        }
+
+                        ConstantFolding.RunPass(operation);
+
+                        Simplification.RunPass(operation);
+
+                        if (DestIsLocalVar(operation) && IsPropagableCopy(operation))
+                        {
+                            PropagateCopy(operation);
+
+                            RemoveNode(block, node);
+
+                            modified = true;
+                        }
+
+                        node = nextNode;
+                    }
+                }
+            }
+            while (modified);
+        }
+
+        private static void PropagateCopy(Operation copyOp)
+        {
+            // Propagate copy source operand to all uses of the destination operand.
+            Operand dest   = copyOp.Destination;
+            Operand source = copyOp.GetSource(0);
+
+            Node[] uses = dest.Uses.ToArray();
+
+            foreach (Node use in uses)
+            {
+                for (int index = 0; index < use.SourcesCount; index++)
+                {
+                    if (use.GetSource(index) == dest)
+                    {
+                        use.SetSource(index, source);
+                    }
+                }
+            }
+        }
+
+        private static void RemoveNode(BasicBlock block, LinkedListNode<Node> llNode)
+        {
+            // Remove a node from the nodes list, and also remove itself
+            // from all the use lists on the operands that this node uses.
+            block.Operations.Remove(llNode);
+
+            Node node = llNode.Value;
+
+            for (int index = 0; index < node.SourcesCount; index++)
+            {
+                node.SetSource(index, null);
+            }
+
+            Debug.Assert(node.Destination == null || node.Destination.Uses.Count == 0);
+
+            node.Destination = null;
+        }
+
+        private static bool IsUnused(Node node)
+        {
+            return DestIsLocalVar(node) && node.Destination.Uses.Count == 0 && !HasSideEffects(node);
+        }
+
+        private static bool DestIsLocalVar(Node node)
+        {
+            return node.Destination != null && node.Destination.Kind == OperandKind.LocalVariable;
+        }
+
+        private static bool HasSideEffects(Node node)
+        {
+            return (node is Operation operation) && operation.Instruction == Instruction.Call;
+        }
+
+        private static bool IsPropagableCopy(Operation operation)
+        {
+            if (operation.Instruction != Instruction.Copy)
+            {
+                return false;
+            }
+
+            return operation.Destination.Type == operation.GetSource(0).Type;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/ARMeilleure/CodeGen/Optimizations/Simplification.cs
new file mode 100644
index 000000000..cafc025ca
--- /dev/null
+++ b/ARMeilleure/CodeGen/Optimizations/Simplification.cs
@@ -0,0 +1,157 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+    static class Simplification
+    {
+        public static void RunPass(Operation operation)
+        {
+            switch (operation.Instruction)
+            {
+                case Instruction.Add:
+                case Instruction.BitwiseExclusiveOr:
+                    TryEliminateBinaryOpComutative(operation, 0);
+                    break;
+
+                case Instruction.BitwiseAnd:
+                    TryEliminateBitwiseAnd(operation);
+                    break;
+
+                case Instruction.BitwiseOr:
+                    TryEliminateBitwiseOr(operation);
+                    break;
+
+                case Instruction.ConditionalSelect:
+                    TryEliminateConditionalSelect(operation);
+                    break;
+
+                case Instruction.Divide:
+                    TryEliminateBinaryOpY(operation, 1);
+                    break;
+
+                case Instruction.Multiply:
+                    TryEliminateBinaryOpComutative(operation, 1);
+                    break;
+
+                case Instruction.ShiftLeft:
+                case Instruction.ShiftRightSI:
+                case Instruction.ShiftRightUI:
+                case Instruction.Subtract:
+                    TryEliminateBinaryOpY(operation, 0);
+                    break;
+            }
+        }
+
+        private static void TryEliminateBitwiseAnd(Operation operation)
+        {
+            // Try to recognize and optimize those 3 patterns (in order):
+            // x & 0xFFFFFFFF == x,          0xFFFFFFFF & y == y,
+            // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+            Operand x = operation.GetSource(0);
+            Operand y = operation.GetSource(1);
+
+            if (IsConstEqual(x, AllOnes(x.Type)))
+            {
+                operation.TurnIntoCopy(y);
+            }
+            else if (IsConstEqual(y, AllOnes(y.Type)))
+            {
+                operation.TurnIntoCopy(x);
+            }
+            else if (IsConstEqual(x, 0) || IsConstEqual(y, 0))
+            {
+                operation.TurnIntoCopy(Const(0));
+            }
+        }
+
+        private static void TryEliminateBitwiseOr(Operation operation)
+        {
+            // Try to recognize and optimize those 3 patterns (in order):
+            // x | 0x00000000 == x,          0x00000000 | y == y,
+            // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+            Operand x = operation.GetSource(0);
+            Operand y = operation.GetSource(1);
+
+            if (IsConstEqual(x, 0))
+            {
+                operation.TurnIntoCopy(y);
+            }
+            else if (IsConstEqual(y, 0))
+            {
+                operation.TurnIntoCopy(x);
+            }
+            else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type)))
+            {
+                operation.TurnIntoCopy(Const(AllOnes(x.Type)));
+            }
+        }
+
+        private static void TryEliminateBinaryOpY(Operation operation, ulong comparand)
+        {
+            Operand x = operation.GetSource(0);
+            Operand y = operation.GetSource(1);
+
+            if (IsConstEqual(y, comparand))
+            {
+                operation.TurnIntoCopy(x);
+            }
+        }
+
+        private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand)
+        {
+            Operand x = operation.GetSource(0);
+            Operand y = operation.GetSource(1);
+
+            if (IsConstEqual(x, comparand))
+            {
+                operation.TurnIntoCopy(y);
+            }
+            else if (IsConstEqual(y, comparand))
+            {
+                operation.TurnIntoCopy(x);
+            }
+        }
+
+        private static void TryEliminateConditionalSelect(Operation operation)
+        {
+            Operand cond = operation.GetSource(0);
+
+            if (cond.Kind != OperandKind.Constant)
+            {
+                return;
+            }
+
+            // The condition is constant, we can turn it into a copy, and select
+            // the source based on the condition value.
+            int srcIndex = cond.Value != 0 ? 1 : 2;
+
+            Operand source = operation.GetSource(srcIndex);
+
+            operation.TurnIntoCopy(source);
+        }
+
+        private static bool IsConstEqual(Operand operand, ulong comparand)
+        {
+            if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger())
+            {
+                return false;
+            }
+
+            return operand.Value == comparand;
+        }
+
+        private static ulong AllOnes(OperandType type)
+        {
+            switch (type)
+            {
+                case OperandType.I32: return ~0U;
+                case OperandType.I64: return ~0UL;
+            }
+
+            throw new ArgumentException("Invalid operand type \"" + type + "\".");
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
new file mode 100644
index 000000000..94ac6991b
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+    struct AllocationResult
+    {
+        public int IntUsedRegisters { get; }
+        public int VecUsedRegisters { get; }
+        public int SpillRegionSize  { get; }
+
+        public AllocationResult(
+            int intUsedRegisters,
+            int vecUsedRegisters,
+            int spillRegionSize)
+        {
+            IntUsedRegisters = intUsedRegisters;
+            VecUsedRegisters = vecUsedRegisters;
+            SpillRegionSize  = spillRegionSize;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
new file mode 100644
index 000000000..65901e80c
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
@@ -0,0 +1,246 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+    class CopyResolver
+    {
+        private class ParallelCopy
+        {
+            private struct Copy
+            {
+                public Register Dest   { get; }
+                public Register Source { get; }
+
+                public OperandType Type { get; }
+
+                public Copy(Register dest, Register source, OperandType type)
+                {
+                    Dest   = dest;
+                    Source = source;
+                    Type   = type;
+                }
+            }
+
+            private List<Copy> _copies;
+
+            public int Count => _copies.Count;
+
+            public ParallelCopy()
+            {
+                _copies = new List<Copy>();
+            }
+
+            public void AddCopy(Register dest, Register source, OperandType type)
+            {
+                _copies.Add(new Copy(dest, source, type));
+            }
+
+            public void Sequence(List<Operation> sequence)
+            {
+                Dictionary<Register, Register> locations = new Dictionary<Register, Register>();
+                Dictionary<Register, Register> sources   = new Dictionary<Register, Register>();
+
+                Dictionary<Register, OperandType> types = new Dictionary<Register, OperandType>();
+
+                Queue<Register> pendingQueue = new Queue<Register>();
+                Queue<Register> readyQueue   = new Queue<Register>();
+
+                foreach (Copy copy in _copies)
+                {
+                    locations[copy.Source] = copy.Source;
+                    sources[copy.Dest]     = copy.Source;
+                    types[copy.Dest]       = copy.Type;
+
+                    pendingQueue.Enqueue(copy.Dest);
+                }
+
+                foreach (Copy copy in _copies)
+                {
+                    // If the destination is not used anywhere, we can assign it immediately.
+                    if (!locations.ContainsKey(copy.Dest))
+                    {
+                        readyQueue.Enqueue(copy.Dest);
+                    }
+                }
+
+                while (pendingQueue.TryDequeue(out Register current))
+                {
+                    Register copyDest;
+                    Register origSource;
+                    Register copySource;
+
+                    while (readyQueue.TryDequeue(out copyDest))
+                    {
+                        origSource = sources[copyDest];
+                        copySource = locations[origSource];
+
+                        OperandType type = types[copyDest];
+
+                        EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+                        locations[origSource] = copyDest;
+
+                        if (origSource == copySource && sources.ContainsKey(origSource))
+                        {
+                            readyQueue.Enqueue(origSource);
+                        }
+                    }
+
+                    copyDest   = current;
+                    origSource = sources[copyDest];
+                    copySource = locations[origSource];
+
+                    if (copyDest != copySource)
+                    {
+                        OperandType type = types[copyDest];
+
+                        type = type.IsInteger() ? OperandType.I64 : OperandType.V128;
+
+                        EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+                        locations[origSource] = copyDest;
+
+                        Register swapOther = copySource;
+
+                        if (copyDest != locations[sources[copySource]])
+                        {
+                            // Find the other swap destination register.
+                            // To do that, we search all the pending registers, and pick
+                            // the one where the copy source register is equal to the
+                            // current destination register being processed (copyDest).
+                            foreach (Register pending in pendingQueue)
+                            {
+                                // Is this a copy of pending <- copyDest?
+                                if (copyDest == locations[sources[pending]])
+                                {
+                                    swapOther = pending;
+
+                                    break;
+                                }
+                            }
+                        }
+
+                        // The value that was previously at "copyDest" now lives on
+                        // "copySource" thanks to the swap, now we need to update the
+                        // location for the next copy that is supposed to copy the value
+                        // that used to live on "copyDest".
+                        locations[sources[swapOther]] = copySource;
+                    }
+                }
+            }
+
+            private static void EmitCopy(List<Operation> sequence, Operand x, Operand y)
+            {
+                sequence.Add(new Operation(Instruction.Copy, x, y));
+            }
+
+            private static void EmitXorSwap(List<Operation> sequence, Operand x, Operand y)
+            {
+                sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+                sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, y, y, x));
+                sequence.Add(new Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+            }
+        }
+
+        private Queue<Operation> _fillQueue  = new Queue<Operation>();
+        private Queue<Operation> _spillQueue = new Queue<Operation>();
+
+        private ParallelCopy _parallelCopy;
+
+        public bool HasCopy { get; private set; }
+
+        public CopyResolver()
+        {
+            _fillQueue  = new Queue<Operation>();
+            _spillQueue = new Queue<Operation>();
+
+            _parallelCopy = new ParallelCopy();
+        }
+
+        public void AddSplit(LiveInterval left, LiveInterval right)
+        {
+            if (left.Local != right.Local)
+            {
+                throw new ArgumentException("Intervals of different variables are not allowed.");
+            }
+
+            OperandType type = left.Local.Type;
+
+            if (left.IsSpilled && !right.IsSpilled)
+            {
+                // Move from the stack to a register.
+                AddSplitFill(left, right, type);
+            }
+            else if (!left.IsSpilled && right.IsSpilled)
+            {
+                // Move from a register to the stack.
+                AddSplitSpill(left, right, type);
+            }
+            else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register)
+            {
+                // Move from one register to another.
+                AddSplitCopy(left, right, type);
+            }
+            else if (left.SpillOffset != right.SpillOffset)
+            {
+                // This would be the stack-to-stack move case, but this is not supported.
+                throw new ArgumentException("Both intervals were spilled.");
+            }
+        }
+
+        private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type)
+        {
+            Operand register = GetRegister(right.Register, type);
+
+            Operand offset = new Operand(left.SpillOffset);
+
+            _fillQueue.Enqueue(new Operation(Instruction.Fill, register, offset));
+
+            HasCopy = true;
+        }
+
+        private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type)
+        {
+            Operand offset = new Operand(right.SpillOffset);
+
+            Operand register = GetRegister(left.Register, type);
+
+            _spillQueue.Enqueue(new Operation(Instruction.Spill, null, offset, register));
+
+            HasCopy = true;
+        }
+
+        private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type)
+        {
+            _parallelCopy.AddCopy(right.Register, left.Register, type);
+
+            HasCopy = true;
+        }
+
+        public Operation[] Sequence()
+        {
+            List<Operation> sequence = new List<Operation>();
+
+            while (_spillQueue.TryDequeue(out Operation spillOp))
+            {
+                sequence.Add(spillOp);
+            }
+
+            _parallelCopy.Sequence(sequence);
+
+            while (_fillQueue.TryDequeue(out Operation fillOp))
+            {
+                sequence.Add(fillOp);
+            }
+
+            return sequence.ToArray();
+        }
+
+        private static Operand GetRegister(Register reg, OperandType type)
+        {
+            return new Operand(reg.Index, reg.Type, type);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
new file mode 100644
index 000000000..9a827420b
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
@@ -0,0 +1,382 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+    class HybridAllocator : IRegisterAllocator
+    {
+        private const int RegistersCount = 16;
+        private const int MaxIROperands  = 4;
+
+        private struct BlockInfo
+        {
+            public bool HasCall { get; }
+
+            public int IntFixedRegisters { get; }
+            public int VecFixedRegisters { get; }
+
+            public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters)
+            {
+                HasCall           = hasCall;
+                IntFixedRegisters = intFixedRegisters;
+                VecFixedRegisters = vecFixedRegisters;
+            }
+        }
+
+        private class LocalInfo
+        {
+            public int Uses     { get; set; }
+            public int UseCount { get; set; }
+
+            public bool PreAllocated { get; set; }
+            public int  Register     { get; set; }
+            public int  SpillOffset  { get; set; }
+
+            public int Sequence { get; set; }
+
+            public Operand Temp { get; set; }
+
+            public OperandType Type { get; }
+
+            private int _first;
+            private int _last;
+
+            public bool IsBlockLocal => _first == _last;
+
+            public LocalInfo(OperandType type, int uses)
+            {
+                Uses = uses;
+                Type = type;
+
+                _first = -1;
+                _last  = -1;
+            }
+
+            public void SetBlockIndex(int blkIndex)
+            {
+                if (_first == -1 || blkIndex < _first)
+                {
+                    _first = blkIndex;
+                }
+
+                if (_last == -1 || blkIndex > _last)
+                {
+                    _last = blkIndex;
+                }
+            }
+        }
+
+        public AllocationResult RunPass(
+            ControlFlowGraph cfg,
+            StackAllocator stackAlloc,
+            RegisterMasks regMasks)
+        {
+            int intUsedRegisters = 0;
+            int vecUsedRegisters = 0;
+
+            int intFreeRegisters = regMasks.IntAvailableRegisters;
+            int vecFreeRegisters = regMasks.VecAvailableRegisters;
+
+            BlockInfo[] blockInfo = new BlockInfo[cfg.Blocks.Count];
+
+            List<LocalInfo> locInfo = new List<LocalInfo>();
+
+            for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+            {
+                BasicBlock block = cfg.PostOrderBlocks[index];
+
+                int intFixedRegisters = 0;
+                int vecFixedRegisters = 0;
+
+                bool hasCall = false;
+
+                foreach (Node node in block.Operations)
+                {
+                    if (node is Operation operation && operation.Instruction == Instruction.Call)
+                    {
+                        hasCall = true;
+                    }
+
+                    for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
+                    {
+                        Operand source = node.GetSource(srcIndex);
+
+                        if (source.Kind == OperandKind.LocalVariable)
+                        {
+                            locInfo[source.AsInt32() - 1].SetBlockIndex(block.Index);
+                        }
+                    }
+
+                    for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++)
+                    {
+                        Operand dest = node.GetDestination(dstIndex);
+
+                        if (dest.Kind == OperandKind.LocalVariable)
+                        {
+                            LocalInfo info;
+
+                            if (dest.Value != 0)
+                            {
+                                info = locInfo[dest.AsInt32() - 1];
+                            }
+                            else
+                            {
+                                dest.NumberLocal(locInfo.Count + 1);
+
+                                info = new LocalInfo(dest.Type, UsesCount(dest));
+
+                                locInfo.Add(info);
+                            }
+
+                            info.SetBlockIndex(block.Index);
+                        }
+                        else if (dest.Kind == OperandKind.Register)
+                        {
+                            if (dest.Type.IsInteger())
+                            {
+                                intFixedRegisters |= 1 << dest.GetRegister().Index;
+                            }
+                            else
+                            {
+                                vecFixedRegisters |= 1 << dest.GetRegister().Index;
+                            }
+                        }
+                    }
+                }
+
+                blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
+            }
+
+            int sequence = 0;
+
+            for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+            {
+                BasicBlock block = cfg.PostOrderBlocks[index];
+
+                BlockInfo blkInfo = blockInfo[block.Index];
+
+                int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters;
+                int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters;
+
+                int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0;
+                int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0;
+
+                int intSpillTempRegisters = SelectSpillTemps(
+                    intCallerSavedRegisters & ~blkInfo.IntFixedRegisters,
+                    intLocalFreeRegisters);
+                int vecSpillTempRegisters = SelectSpillTemps(
+                    vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters,
+                    vecLocalFreeRegisters);
+
+                intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters);
+                vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters);
+
+                for (LinkedListNode<Node> llNode = block.Operations.First; llNode != null; llNode = llNode.Next)
+                {
+                    Node node = llNode.Value;
+
+                    int intLocalUse = 0;
+                    int vecLocalUse = 0;
+
+                    for (int srcIndex = 0; srcIndex < node.SourcesCount; srcIndex++)
+                    {
+                        Operand source = node.GetSource(srcIndex);
+
+                        if (source.Kind != OperandKind.LocalVariable)
+                        {
+                            continue;
+                        }
+
+                        LocalInfo info = locInfo[source.AsInt32() - 1];
+
+                        info.UseCount++;
+
+                        Debug.Assert(info.UseCount <= info.Uses);
+
+                        if (info.Register != -1)
+                        {
+                            node.SetSource(srcIndex, Register(info.Register, source.Type.ToRegisterType(), source.Type));
+
+                            if (info.UseCount == info.Uses && !info.PreAllocated)
+                            {
+                                if (source.Type.IsInteger())
+                                {
+                                    intLocalFreeRegisters |= 1 << info.Register;
+                                }
+                                else
+                                {
+                                    vecLocalFreeRegisters |= 1 << info.Register;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            Operand temp = info.Temp;
+
+                            if (temp == null || info.Sequence != sequence)
+                            {
+                                temp = source.Type.IsInteger()
+                                    ? GetSpillTemp(source, intSpillTempRegisters, ref intLocalUse)
+                                    : GetSpillTemp(source, vecSpillTempRegisters, ref vecLocalUse);
+
+                                info.Sequence = sequence;
+                                info.Temp     = temp;
+                            }
+
+                            node.SetSource(srcIndex, temp);
+
+                            Operation fillOp = new Operation(Instruction.Fill, temp, Const(info.SpillOffset));
+
+                            block.Operations.AddBefore(llNode, fillOp);
+                        }
+                    }
+
+                    int intLocalAsg = 0;
+                    int vecLocalAsg = 0;
+
+                    for (int dstIndex = 0; dstIndex < node.DestinationsCount; dstIndex++)
+                    {
+                        Operand dest = node.GetDestination(dstIndex);
+
+                        if (dest.Kind != OperandKind.LocalVariable)
+                        {
+                            continue;
+                        }
+
+                        LocalInfo info = locInfo[dest.AsInt32() - 1];
+
+                        if (info.UseCount == 0 && !info.PreAllocated)
+                        {
+                            int mask = dest.Type.IsInteger()
+                                ? intLocalFreeRegisters
+                                : vecLocalFreeRegisters;
+
+                            if (info.IsBlockLocal && mask != 0)
+                            {
+                                int selectedReg = BitUtils.LowestBitSet(mask);
+
+                                info.Register = selectedReg;
+
+                                if (dest.Type.IsInteger())
+                                {
+                                    intLocalFreeRegisters &= ~(1 << selectedReg);
+                                    intUsedRegisters      |=   1 << selectedReg;
+                                }
+                                else
+                                {
+                                    vecLocalFreeRegisters &= ~(1 << selectedReg);
+                                    vecUsedRegisters      |=   1 << selectedReg;
+                                }
+                            }
+                            else
+                            {
+                                info.Register    = -1;
+                                info.SpillOffset = stackAlloc.Allocate(dest.Type.GetSizeInBytes());
+                            }
+                        }
+
+                        info.UseCount++;
+
+                        Debug.Assert(info.UseCount <= info.Uses);
+
+                        if (info.Register != -1)
+                        {
+                            node.SetDestination(dstIndex, Register(info.Register, dest.Type.ToRegisterType(), dest.Type));
+                        }
+                        else
+                        {
+                            Operand temp = info.Temp;
+
+                            if (temp == null || info.Sequence != sequence)
+                            {
+                                temp = dest.Type.IsInteger()
+                                    ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg)
+                                    : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg);
+
+                                info.Sequence = sequence;
+                                info.Temp     = temp;
+                            }
+
+                            node.SetDestination(dstIndex, temp);
+
+                            Operation spillOp = new Operation(Instruction.Spill, null, Const(info.SpillOffset), temp);
+
+                            llNode = block.Operations.AddAfter(llNode, spillOp);
+                        }
+                    }
+
+                    sequence++;
+
+                    intUsedRegisters |= intLocalAsg | intLocalUse;
+                    vecUsedRegisters |= vecLocalAsg | vecLocalUse;
+                }
+            }
+
+            return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize);
+        }
+
+        private static int SelectSpillTemps(int mask0, int mask1)
+        {
+            int selection = 0;
+            int count     = 0;
+
+            while (count < MaxIROperands && mask0 != 0)
+            {
+                int mask = mask0 & -mask0;
+
+                selection |= mask;
+
+                mask0 &= ~mask;
+
+                count++;
+            }
+
+            while (count < MaxIROperands && mask1 != 0)
+            {
+                int mask = mask1 & -mask1;
+
+                selection |= mask;
+
+                mask1 &= ~mask;
+
+                count++;
+            }
+
+            Debug.Assert(count == MaxIROperands, "No enough registers for spill temps.");
+
+            return selection;
+        }
+
+        private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask)
+        {
+            int selectedReg = BitUtils.LowestBitSet(freeMask & ~useMask);
+
+            useMask |= 1 << selectedReg;
+
+            return Register(selectedReg, local.Type.ToRegisterType(), local.Type);
+        }
+
+        private static int UsesCount(Operand local)
+        {
+            return local.Assignments.Count + local.Uses.Count;
+        }
+
+        private static IEnumerable<BasicBlock> Successors(BasicBlock block)
+        {
+            if (block.Next != null)
+            {
+                yield return block.Next;
+            }
+
+            if (block.Branch != null)
+            {
+                yield return block.Branch;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
new file mode 100644
index 000000000..8f236c253
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
@@ -0,0 +1,12 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+    interface IRegisterAllocator
+    {
+        AllocationResult RunPass(
+            ControlFlowGraph cfg,
+            StackAllocator stackAlloc,
+            RegisterMasks regMasks);
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
new file mode 100644
index 000000000..6d5ecc141
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
@@ -0,0 +1,1019 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+    // Based on:
+    // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler".
+    // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf
+    class LinearScanAllocator : IRegisterAllocator
+    {
+        private const int InstructionGap     = 2;
+        private const int InstructionGapMask = InstructionGap - 1;
+
+        private const int RegistersCount = 16;
+
+        private HashSet<int> _blockEdges;
+
+        private LiveRange[] _blockRanges;
+
+        private BitMap[] _blockLiveIn;
+
+        private List<LiveInterval> _intervals;
+
+        private LiveInterval[] _parentIntervals;
+
+        private List<LinkedListNode<Node>> _operationNodes;
+
+        private int _operationsCount;
+
+        private class AllocationContext
+        {
+            public RegisterMasks Masks { get; }
+
+            public StackAllocator StackAlloc { get; }
+
+            public BitMap Active   { get; }
+            public BitMap Inactive { get; }
+
+            public int IntUsedRegisters { get; set; }
+            public int VecUsedRegisters { get; set; }
+
+            public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount)
+            {
+                StackAlloc = stackAlloc;
+                Masks      = masks;
+
+                Active   = new BitMap(intervalsCount);
+                Inactive = new BitMap(intervalsCount);
+            }
+
+            public void MoveActiveToInactive(int bit)
+            {
+                Move(Active, Inactive, bit);
+            }
+
+            public void MoveInactiveToActive(int bit)
+            {
+                Move(Inactive, Active, bit);
+            }
+
+            private static void Move(BitMap source, BitMap dest, int bit)
+            {
+                source.Clear(bit);
+
+                dest.Set(bit);
+            }
+        }
+
+        public AllocationResult RunPass(
+            ControlFlowGraph cfg,
+            StackAllocator stackAlloc,
+            RegisterMasks regMasks)
+        {
+            NumberLocals(cfg);
+
+            AllocationContext context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
+
+            BuildIntervals(cfg, context);
+
+            for (int index = 0; index < _intervals.Count; index++)
+            {
+                LiveInterval current = _intervals[index];
+
+                if (current.IsEmpty)
+                {
+                    continue;
+                }
+
+                if (current.IsFixed)
+                {
+                    context.Active.Set(index);
+
+                    if (current.Register.Type == RegisterType.Integer)
+                    {
+                        context.IntUsedRegisters |= 1 << current.Register.Index;
+                    }
+                    else /* if (interval.Register.Type == RegisterType.Vector) */
+                    {
+                        context.VecUsedRegisters |= 1 << current.Register.Index;
+                    }
+
+                    continue;
+                }
+
+                AllocateInterval(context, current, index);
+            }
+
+            for (int index = RegistersCount * 2; index < _intervals.Count; index++)
+            {
+                if (!_intervals[index].IsSpilled)
+                {
+                    ReplaceLocalWithRegister(_intervals[index]);
+                }
+            }
+
+            InsertSplitCopies();
+            InsertSplitCopiesAtEdges(cfg);
+
+            return new AllocationResult(
+                context.IntUsedRegisters,
+                context.VecUsedRegisters,
+                context.StackAlloc.TotalSize);
+        }
+
+        private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex)
+        {
+            // Check active intervals that already ended.
+            foreach (int iIndex in context.Active)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (interval.GetEnd() < current.GetStart())
+                {
+                    context.Active.Clear(iIndex);
+                }
+                else if (!interval.Overlaps(current.GetStart()))
+                {
+                    context.MoveActiveToInactive(iIndex);
+                }
+            }
+
+            // Check inactive intervals that already ended or were reactivated.
+            foreach (int iIndex in context.Inactive)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (interval.GetEnd() < current.GetStart())
+                {
+                    context.Inactive.Clear(iIndex);
+                }
+                else if (interval.Overlaps(current.GetStart()))
+                {
+                    context.MoveInactiveToActive(iIndex);
+                }
+            }
+
+            if (!TryAllocateRegWithoutSpill(context, current, cIndex))
+            {
+                AllocateRegWithSpill(context, current, cIndex);
+            }
+        }
+
+        private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex)
+        {
+            RegisterType regType = current.Local.Type.ToRegisterType();
+
+            int availableRegisters = context.Masks.GetAvailableRegisters(regType);
+
+            int[] freePositions = new int[RegistersCount];
+
+            for (int index = 0; index < RegistersCount; index++)
+            {
+                if ((availableRegisters & (1 << index)) != 0)
+                {
+                    freePositions[index] = int.MaxValue;
+                }
+            }
+
+            foreach (int iIndex in context.Active)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (interval.Register.Type == regType)
+                {
+                    freePositions[interval.Register.Index] = 0;
+                }
+            }
+
+            foreach (int iIndex in context.Inactive)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (interval.Register.Type == regType)
+                {
+                    int overlapPosition = interval.GetOverlapPosition(current);
+
+                    if (overlapPosition != LiveInterval.NotFound && freePositions[interval.Register.Index] > overlapPosition)
+                    {
+                        freePositions[interval.Register.Index] = overlapPosition;
+                    }
+                }
+            }
+
+            int selectedReg = GetHighestValueIndex(freePositions);
+
+            int selectedNextUse = freePositions[selectedReg];
+
+            // Intervals starts and ends at odd positions, unless they span an entire
+            // block, in this case they will have ranges at a even position.
+            // When a interval is loaded from the stack to a register, we can only
+            // do the split at a odd position, because otherwise the split interval
+            // that is inserted on the list to be processed may clobber a register
+            // used by the instruction at the same position as the split.
+            // The problem only happens when a interval ends exactly at this instruction,
+            // because otherwise they would interfere, and the register wouldn't be selected.
+            // When the interval is aligned and the above happens, there's no problem as
+            // the instruction that is actually with the last use is the one
+            // before that position.
+            selectedNextUse &= ~InstructionGapMask;
+
+            if (selectedNextUse <= current.GetStart())
+            {
+                return false;
+            }
+            else if (selectedNextUse < current.GetEnd())
+            {
+                Debug.Assert(selectedNextUse > current.GetStart(), "Trying to split interval at the start.");
+
+                LiveInterval splitChild = current.Split(selectedNextUse);
+
+                if (splitChild.UsesCount != 0)
+                {
+                    Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+                    InsertInterval(splitChild);
+                }
+                else
+                {
+                    Spill(context, splitChild);
+                }
+            }
+
+            current.Register = new Register(selectedReg, regType);
+
+            if (regType == RegisterType.Integer)
+            {
+                context.IntUsedRegisters |= 1 << selectedReg;
+            }
+            else /* if (regType == RegisterType.Vector) */
+            {
+                context.VecUsedRegisters |= 1 << selectedReg;
+            }
+
+            context.Active.Set(cIndex);
+
+            return true;
+        }
+
+        private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex)
+        {
+            RegisterType regType = current.Local.Type.ToRegisterType();
+
+            int availableRegisters = context.Masks.GetAvailableRegisters(regType);
+
+            int[] usePositions     = new int[RegistersCount];
+            int[] blockedPositions = new int[RegistersCount];
+
+            for (int index = 0; index < RegistersCount; index++)
+            {
+                if ((availableRegisters & (1 << index)) != 0)
+                {
+                    usePositions[index] = int.MaxValue;
+
+                    blockedPositions[index] = int.MaxValue;
+                }
+            }
+
+            void SetUsePosition(int index, int position)
+            {
+                usePositions[index] = Math.Min(usePositions[index], position);
+            }
+
+            void SetBlockedPosition(int index, int position)
+            {
+                blockedPositions[index] = Math.Min(blockedPositions[index], position);
+
+                SetUsePosition(index, position);
+            }
+
+            foreach (int iIndex in context.Active)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (!interval.IsFixed && interval.Register.Type == regType)
+                {
+                    int nextUse = interval.NextUseAfter(current.GetStart());
+
+                    if (nextUse != -1)
+                    {
+                        SetUsePosition(interval.Register.Index, nextUse);
+                    }
+                }
+            }
+
+            foreach (int iIndex in context.Inactive)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (!interval.IsFixed && interval.Register.Type == regType && interval.Overlaps(current))
+                {
+                    int nextUse = interval.NextUseAfter(current.GetStart());
+
+                    if (nextUse != -1)
+                    {
+                        SetUsePosition(interval.Register.Index, nextUse);
+                    }
+                }
+            }
+
+            foreach (int iIndex in context.Active)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (interval.IsFixed && interval.Register.Type == regType)
+                {
+                    SetBlockedPosition(interval.Register.Index, 0);
+                }
+            }
+
+            foreach (int iIndex in context.Inactive)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (interval.IsFixed && interval.Register.Type == regType)
+                {
+                    int overlapPosition = interval.GetOverlapPosition(current);
+
+                    if (overlapPosition != LiveInterval.NotFound)
+                    {
+                        SetBlockedPosition(interval.Register.Index, overlapPosition);
+                    }
+                }
+            }
+
+            int selectedReg = GetHighestValueIndex(usePositions);
+
+            int currentFirstUse = current.FirstUse();
+
+            Debug.Assert(currentFirstUse >= 0, "Current interval has no uses.");
+
+            if (usePositions[selectedReg] < currentFirstUse)
+            {
+                // All intervals on inactive and active are being used before current,
+                // so spill the current interval.
+                Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+                LiveInterval splitChild = current.Split(currentFirstUse);
+
+                Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+                InsertInterval(splitChild);
+
+                Spill(context, current);
+            }
+            else if (blockedPositions[selectedReg] > current.GetEnd())
+            {
+                // Spill made the register available for the entire current lifetime,
+                // so we only need to split the intervals using the selected register.
+                current.Register = new Register(selectedReg, regType);
+
+                SplitAndSpillOverlappingIntervals(context, current);
+
+                context.Active.Set(cIndex);
+            }
+            else
+            {
+                // There are conflicts even after spill due to the use of fixed registers
+                // that can't be spilled, so we need to also split current at the point of
+                // the first fixed register use.
+                current.Register = new Register(selectedReg, regType);
+
+                int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask;
+
+                Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position.");
+
+                LiveInterval splitChild = current.Split(splitPosition);
+
+                if (splitChild.UsesCount != 0)
+                {
+                    Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+                    InsertInterval(splitChild);
+                }
+                else
+                {
+                    Spill(context, splitChild);
+                }
+
+                SplitAndSpillOverlappingIntervals(context, current);
+
+                context.Active.Set(cIndex);
+            }
+        }
+
+        private static int GetHighestValueIndex(int[] array)
+        {
+            int higuest = array[0];
+
+            if (higuest == int.MaxValue)
+            {
+                return 0;
+            }
+
+            int selected = 0;
+
+            for (int index = 1; index < array.Length; index++)
+            {
+                int current = array[index];
+
+                if (higuest < current)
+                {
+                    higuest  = current;
+                    selected = index;
+
+                    if (current == int.MaxValue)
+                    {
+                        break;
+                    }
+                }
+            }
+
+            return selected;
+        }
+
+        private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current)
+        {
+            foreach (int iIndex in context.Active)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (!interval.IsFixed && interval.Register == current.Register)
+                {
+                    SplitAndSpillOverlappingInterval(context, current, interval);
+
+                    context.Active.Clear(iIndex);
+                }
+            }
+
+            foreach (int iIndex in context.Inactive)
+            {
+                LiveInterval interval = _intervals[iIndex];
+
+                if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
+                {
+                    SplitAndSpillOverlappingInterval(context, current, interval);
+
+                    context.Inactive.Clear(iIndex);
+                }
+            }
+        }
+
+        private void SplitAndSpillOverlappingInterval(
+            AllocationContext context,
+            LiveInterval      current,
+            LiveInterval      interval)
+        {
+            // If there's a next use after the start of the current interval,
+            // we need to split the spilled interval twice, and re-insert it
+            // on the "pending" list to ensure that it will get a new register
+            // on that use position.
+            int nextUse = interval.NextUseAfter(current.GetStart());
+
+            LiveInterval splitChild;
+
+            if (interval.GetStart() < current.GetStart())
+            {
+                splitChild = interval.Split(current.GetStart());
+            }
+            else
+            {
+                splitChild = interval;
+            }
+
+            if (nextUse != -1)
+            {
+                Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+                if (nextUse > splitChild.GetStart())
+                {
+                    LiveInterval right = splitChild.Split(nextUse);
+
+                    Spill(context, splitChild);
+
+                    splitChild = right;
+                }
+
+                InsertInterval(splitChild);
+            }
+            else
+            {
+                Spill(context, splitChild);
+            }
+        }
+
+        private void InsertInterval(LiveInterval interval)
+        {
+            Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
+            Debug.Assert(!interval.IsEmpty,       "Trying to insert a empty interval.");
+            Debug.Assert(!interval.IsSpilled,     "Trying to insert a spilled interval.");
+
+            int startIndex = RegistersCount * 2;
+
+            int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
+
+            if (insertIndex < 0)
+            {
+                insertIndex = ~insertIndex;
+            }
+
+            _intervals.Insert(insertIndex, interval);
+        }
+
+        private void Spill(AllocationContext context, LiveInterval interval)
+        {
+            Debug.Assert(!interval.IsFixed,       "Trying to spill a fixed interval.");
+            Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses.");
+
+            // We first check if any of the siblings were spilled, if so we can reuse
+            // the stack offset. Otherwise, we allocate a new space on the stack.
+            // This prevents stack-to-stack copies being necessary for a split interval.
+            if (!interval.TrySpillWithSiblingOffset())
+            {
+                interval.Spill(context.StackAlloc.Allocate(interval.Local.Type));
+            }
+        }
+
+        private void InsertSplitCopies()
+        {
+            Dictionary<int, CopyResolver> copyResolvers = new Dictionary<int, CopyResolver>();
+
+            CopyResolver GetCopyResolver(int position)
+            {
+                CopyResolver copyResolver = new CopyResolver();
+
+                if (copyResolvers.TryAdd(position, copyResolver))
+                {
+                    return copyResolver;
+                }
+
+                return copyResolvers[position];
+            }
+
+            foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit))
+            {
+                LiveInterval previous = interval;
+
+                foreach (LiveInterval splitChild in interval.SplitChilds())
+                {
+                    int splitPosition = splitChild.GetStart();
+
+                    if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition)
+                    {
+                        GetCopyResolver(splitPosition).AddSplit(previous, splitChild);
+                    }
+
+                    previous = splitChild;
+                }
+            }
+
+            foreach (KeyValuePair<int, CopyResolver> kv in copyResolvers)
+            {
+                CopyResolver copyResolver = kv.Value;
+
+                if (!copyResolver.HasCopy)
+                {
+                    continue;
+                }
+
+                int splitPosition = kv.Key;
+
+                LinkedListNode<Node> node = GetOperationNode(splitPosition);
+
+                Operation[] sequence = copyResolver.Sequence();
+
+                node = node.List.AddBefore(node, sequence[0]);
+
+                for (int index = 1; index < sequence.Length; index++)
+                {
+                    node = node.List.AddAfter(node, sequence[index]);
+                }
+            }
+        }
+
+        private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg)
+        {
+            int blocksCount = cfg.Blocks.Count;
+
+            bool IsSplitEdgeBlock(BasicBlock block)
+            {
+                return block.Index >= blocksCount;
+            }
+
+            for (LinkedListNode<BasicBlock> node = cfg.Blocks.First; node != null; node = node.Next)
+            {
+                BasicBlock block = node.Value;
+
+                if (IsSplitEdgeBlock(block))
+                {
+                    continue;
+                }
+
+                bool hasSingleOrNoSuccessor = block.Next == null || block.Branch == null;
+
+                foreach (BasicBlock successor in Successors(block))
+                {
+                    int succIndex = successor.Index;
+
+                    // If the current node is a split node, then the actual successor node
+                    // (the successor before the split) should be right after it.
+                    if (IsSplitEdgeBlock(successor))
+                    {
+                        succIndex = Successors(successor).First().Index;
+                    }
+
+                    CopyResolver copyResolver = new CopyResolver();
+
+                    foreach (int iIndex in _blockLiveIn[succIndex])
+                    {
+                        LiveInterval interval = _parentIntervals[iIndex];
+
+                        if (!interval.IsSplit)
+                        {
+                            continue;
+                        }
+
+                        int lEnd   = _blockRanges[block.Index].End - 1;
+                        int rStart = _blockRanges[succIndex].Start;
+
+                        LiveInterval left  = interval.GetSplitChild(lEnd);
+                        LiveInterval right = interval.GetSplitChild(rStart);
+
+                        if (left != null && right != null && left != right)
+                        {
+                            copyResolver.AddSplit(left, right);
+                        }
+                    }
+
+                    if (!copyResolver.HasCopy)
+                    {
+                        continue;
+                    }
+
+                    Operation[] sequence = copyResolver.Sequence();
+
+                    if (hasSingleOrNoSuccessor)
+                    {
+                        foreach (Operation operation in sequence)
+                        {
+                            block.Append(operation);
+                        }
+                    }
+                    else if (successor.Predecessors.Count == 1)
+                    {
+                        LinkedListNode<Node> prependNode = successor.Operations.AddFirst(sequence[0]);
+
+                        for (int index = 1; index < sequence.Length; index++)
+                        {
+                            Operation operation = sequence[index];
+
+                            prependNode = successor.Operations.AddAfter(prependNode, operation);
+                        }
+                    }
+                    else
+                    {
+                        // Split the critical edge.
+                        BasicBlock splitBlock = cfg.SplitEdge(block, successor);
+
+                        foreach (Operation operation in sequence)
+                        {
+                            splitBlock.Append(operation);
+                        }
+                    }
+                }
+            }
+        }
+
+        private void ReplaceLocalWithRegister(LiveInterval current)
+        {
+            Operand register = GetRegister(current);
+
+            foreach (int usePosition in current.UsePositions())
+            {
+                Node operation = GetOperationNode(usePosition).Value;
+
+                for (int index = 0; index < operation.SourcesCount; index++)
+                {
+                    Operand source = operation.GetSource(index);
+
+                    if (source == current.Local)
+                    {
+                        operation.SetSource(index, register);
+                    }
+                }
+
+                for (int index = 0; index < operation.DestinationsCount; index++)
+                {
+                    Operand dest = operation.GetDestination(index);
+
+                    if (dest == current.Local)
+                    {
+                        operation.SetDestination(index, register);
+                    }
+                }
+            }
+        }
+
+        private static Operand GetRegister(LiveInterval interval)
+        {
+            Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed.");
+
+            return new Operand(
+                interval.Register.Index,
+                interval.Register.Type,
+                interval.Local.Type);
+        }
+
+        private LinkedListNode<Node> GetOperationNode(int position)
+        {
+            return _operationNodes[position / InstructionGap];
+        }
+
+        private void NumberLocals(ControlFlowGraph cfg)
+        {
+            _operationNodes = new List<LinkedListNode<Node>>();
+
+            _intervals = new List<LiveInterval>();
+
+            for (int index = 0; index < RegistersCount; index++)
+            {
+                _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
+                _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
+            }
+
+            HashSet<Operand> visited = new HashSet<Operand>();
+
+            _operationsCount = 0;
+
+            for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+            {
+                BasicBlock block = cfg.PostOrderBlocks[index];
+
+                for (LinkedListNode<Node> node = block.Operations.First; node != null; node = node.Next)
+                {
+                    _operationNodes.Add(node);
+
+                    Node operation = node.Value;
+
+                    foreach (Operand dest in Destinations(operation))
+                    {
+                        if (dest.Kind == OperandKind.LocalVariable && visited.Add(dest))
+                        {
+                            dest.NumberLocal(_intervals.Count);
+
+                            _intervals.Add(new LiveInterval(dest));
+                        }
+                    }
+                }
+
+                _operationsCount += block.Operations.Count * InstructionGap;
+
+                if (block.Operations.Count == 0)
+                {
+                    // Pretend we have a dummy instruction on the empty block.
+                    _operationNodes.Add(null);
+
+                    _operationsCount += InstructionGap;
+                }
+            }
+
+            _parentIntervals = _intervals.ToArray();
+        }
+
+        private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context)
+        {
+            _blockRanges = new LiveRange[cfg.Blocks.Count];
+
+            int mapSize = _intervals.Count;
+
+            BitMap[] blkLiveGen  = new BitMap[cfg.Blocks.Count];
+            BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count];
+
+            // Compute local live sets.
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                BitMap liveGen  = new BitMap(mapSize);
+                BitMap liveKill = new BitMap(mapSize);
+
+                foreach (Node node in block.Operations)
+                {
+                    foreach (Operand source in Sources(node))
+                    {
+                        int id = GetOperandId(source);
+
+                        if (!liveKill.IsSet(id))
+                        {
+                            liveGen.Set(id);
+                        }
+                    }
+
+                    foreach (Operand dest in Destinations(node))
+                    {
+                        liveKill.Set(GetOperandId(dest));
+                    }
+                }
+
+                blkLiveGen [block.Index] = liveGen;
+                blkLiveKill[block.Index] = liveKill;
+            }
+
+            // Compute global live sets.
+            BitMap[] blkLiveIn  = new BitMap[cfg.Blocks.Count];
+            BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count];
+
+            for (int index = 0; index < cfg.Blocks.Count; index++)
+            {
+                blkLiveIn [index] = new BitMap(mapSize);
+                blkLiveOut[index] = new BitMap(mapSize);
+            }
+
+            bool modified;
+
+            do
+            {
+                modified = false;
+
+                for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+                {
+                    BasicBlock block = cfg.PostOrderBlocks[index];
+
+                    BitMap liveOut = blkLiveOut[block.Index];
+
+                    foreach (BasicBlock successor in Successors(block))
+                    {
+                        if (liveOut.Set(blkLiveIn[successor.Index]))
+                        {
+                            modified = true;
+                        }
+                    }
+
+                    BitMap liveIn = blkLiveIn[block.Index];
+
+                    liveIn.Set  (liveOut);
+                    liveIn.Clear(blkLiveKill[block.Index]);
+                    liveIn.Set  (blkLiveGen [block.Index]);
+                }
+            }
+            while (modified);
+
+            _blockLiveIn = blkLiveIn;
+
+            _blockEdges = new HashSet<int>();
+
+            // Compute lifetime intervals.
+            int operationPos = _operationsCount;
+
+            for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+            {
+                BasicBlock block = cfg.PostOrderBlocks[index];
+
+                // We handle empty blocks by pretending they have a dummy instruction,
+                // because otherwise the block would have the same start and end position,
+                // and this is not valid.
+                int instCount = Math.Max(block.Operations.Count, 1);
+
+                int blockStart = operationPos - instCount * InstructionGap;
+                int blockEnd   = operationPos;
+
+                _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd);
+
+                _blockEdges.Add(blockStart);
+
+                BitMap liveOut = blkLiveOut[block.Index];
+
+                foreach (int id in liveOut)
+                {
+                    _intervals[id].AddRange(blockStart, blockEnd);
+                }
+
+                if (block.Operations.Count == 0)
+                {
+                    operationPos -= InstructionGap;
+
+                    continue;
+                }
+
+                foreach (Node node in BottomOperations(block))
+                {
+                    operationPos -= InstructionGap;
+
+                    foreach (Operand dest in Destinations(node))
+                    {
+                        LiveInterval interval = _intervals[GetOperandId(dest)];
+
+                        interval.SetStart(operationPos + 1);
+                        interval.AddUsePosition(operationPos + 1);
+                    }
+
+                    foreach (Operand source in Sources(node))
+                    {
+                        LiveInterval interval = _intervals[GetOperandId(source)];
+
+                        interval.AddRange(blockStart, operationPos + 1);
+                        interval.AddUsePosition(operationPos);
+                    }
+
+                    if (node is Operation operation && operation.Instruction == Instruction.Call)
+                    {
+                        AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer);
+                        AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector);
+                    }
+                }
+            }
+        }
+
+        private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType)
+        {
+            while (mask != 0)
+            {
+                int regIndex = BitUtils.LowestBitSet(mask);
+
+                Register callerSavedReg = new Register(regIndex, regType);
+
+                LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)];
+
+                interval.AddRange(operationPos + 1, operationPos + InstructionGap);
+
+                mask &= ~(1 << regIndex);
+            }
+        }
+
+        private static int GetOperandId(Operand operand)
+        {
+            if (operand.Kind == OperandKind.LocalVariable)
+            {
+                return operand.AsInt32();
+            }
+            else if (operand.Kind == OperandKind.Register)
+            {
+                return GetRegisterId(operand.GetRegister());
+            }
+            else
+            {
+                throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\".");
+            }
+        }
+
+        private static int GetRegisterId(Register register)
+        {
+            return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0);
+        }
+
+        private static IEnumerable<BasicBlock> Successors(BasicBlock block)
+        {
+            if (block.Next != null)
+            {
+                yield return block.Next;
+            }
+
+            if (block.Branch != null)
+            {
+                yield return block.Branch;
+            }
+        }
+
+        private static IEnumerable<Node> BottomOperations(BasicBlock block)
+        {
+            LinkedListNode<Node> node = block.Operations.Last;
+
+            while (node != null && !(node.Value is PhiNode))
+            {
+                yield return node.Value;
+
+                node = node.Previous;
+            }
+        }
+
+        private static IEnumerable<Operand> Destinations(Node node)
+        {
+            for (int index = 0; index < node.DestinationsCount; index++)
+            {
+                yield return node.GetDestination(index);
+            }
+        }
+
+        private static IEnumerable<Operand> Sources(Node node)
+        {
+            for (int index = 0; index < node.SourcesCount; index++)
+            {
+                Operand source = node.GetSource(index);
+
+                if (IsLocalOrRegister(source.Kind))
+                {
+                    yield return source;
+                }
+            }
+        }
+
+        private static bool IsLocalOrRegister(OperandKind kind)
+        {
+            return kind == OperandKind.LocalVariable ||
+                   kind == OperandKind.Register;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
new file mode 100644
index 000000000..18858a768
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
@@ -0,0 +1,390 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+    class LiveInterval : IComparable<LiveInterval>
+    {
+        public const int NotFound = -1;
+
+        private LiveInterval _parent;
+
+        private SortedSet<int> _usePositions;
+
+        public int UsesCount => _usePositions.Count;
+
+        private List<LiveRange> _ranges;
+
+        private SortedList<int, LiveInterval> _childs;
+
+        public bool IsSplit => _childs.Count != 0;
+
+        public Operand Local { get; }
+
+        public Register Register { get; set; }
+
+        public int SpillOffset { get; private set; }
+
+        public bool IsSpilled => SpillOffset != -1;
+        public bool IsFixed { get; }
+
+        public bool IsEmpty => _ranges.Count == 0;
+
+        public LiveInterval(Operand local = null, LiveInterval parent = null)
+        {
+            Local   = local;
+            _parent = parent ?? this;
+
+            _usePositions = new SortedSet<int>();
+
+            _ranges = new List<LiveRange>();
+
+            _childs = new SortedList<int, LiveInterval>();
+
+            SpillOffset = -1;
+        }
+
+        public LiveInterval(Register register) : this()
+        {
+            IsFixed  = true;
+            Register = register;
+        }
+
+        public void SetStart(int position)
+        {
+            if (_ranges.Count != 0)
+            {
+                Debug.Assert(position != _ranges[0].End);
+
+                _ranges[0] = new LiveRange(position, _ranges[0].End);
+            }
+            else
+            {
+                _ranges.Add(new LiveRange(position, position + 1));
+            }
+        }
+
+        public int GetStart()
+        {
+            if (_ranges.Count == 0)
+            {
+                throw new InvalidOperationException("Empty interval.");
+            }
+
+            return _ranges[0].Start;
+        }
+
+        public void SetEnd(int position)
+        {
+            if (_ranges.Count != 0)
+            {
+                int lastIdx = _ranges.Count - 1;
+
+                Debug.Assert(position != _ranges[lastIdx].Start);
+
+                _ranges[lastIdx] = new LiveRange(_ranges[lastIdx].Start, position);
+            }
+            else
+            {
+                _ranges.Add(new LiveRange(position, position + 1));
+            }
+        }
+
+        public int GetEnd()
+        {
+            if (_ranges.Count == 0)
+            {
+                throw new InvalidOperationException("Empty interval.");
+            }
+
+            return _ranges[_ranges.Count - 1].End;
+        }
+
+        public void AddRange(int start, int end)
+        {
+            if (start >= end)
+            {
+                throw new ArgumentException("Invalid range start position " + start + ", " + end);
+            }
+
+            int index = _ranges.BinarySearch(new LiveRange(start, end));
+
+            if (index >= 0)
+            {
+                // New range insersects with an existing range, we need to remove
+                // all the intersecting ranges before adding the new one.
+                // We also extend the new range as needed, based on the values of
+                // the existing ranges being removed.
+                int lIndex = index;
+                int rIndex = index;
+
+                while (lIndex > 0 && _ranges[lIndex - 1].End >= start)
+                {
+                    lIndex--;
+                }
+
+                while (rIndex + 1 < _ranges.Count && _ranges[rIndex + 1].Start <= end)
+                {
+                    rIndex++;
+                }
+
+                if (start > _ranges[lIndex].Start)
+                {
+                    start = _ranges[lIndex].Start;
+                }
+
+                if (end < _ranges[rIndex].End)
+                {
+                    end = _ranges[rIndex].End;
+                }
+
+                _ranges.RemoveRange(lIndex, (rIndex - lIndex) + 1);
+
+                InsertRange(lIndex, start, end);
+            }
+            else
+            {
+                InsertRange(~index, start, end);
+            }
+        }
+
+        private void InsertRange(int index, int start, int end)
+        {
+            // Here we insert a new range on the ranges list.
+            // If possible, we extend an existing range rather than inserting a new one.
+            // We can extend an existing range if any of the following conditions are true:
+            // - The new range starts right after the end of the previous range on the list.
+            // - The new range ends right before the start of the next range on the list.
+            // If both cases are true, we can extend either one. We prefer to extend the
+            // previous range, and then remove the next one, but theres no specific reason
+            // for that, extending either one will do.
+            int? extIndex = null;
+
+            if (index > 0 && _ranges[index - 1].End == start)
+            {
+                start = _ranges[index - 1].Start;
+
+                extIndex = index - 1;
+            }
+
+            if (index < _ranges.Count && _ranges[index].Start == end)
+            {
+                end = _ranges[index].End;
+
+                if (extIndex.HasValue)
+                {
+                    _ranges.RemoveAt(index);
+                }
+                else
+                {
+                    extIndex = index;
+                }
+            }
+
+            if (extIndex.HasValue)
+            {
+                _ranges[extIndex.Value] = new LiveRange(start, end);
+            }
+            else
+            {
+                _ranges.Insert(index, new LiveRange(start, end));
+            }
+        }
+
+        public void AddUsePosition(int position)
+        {
+            _usePositions.Add(position);
+        }
+
+        public bool Overlaps(int position)
+        {
+            return _ranges.BinarySearch(new LiveRange(position, position + 1)) >= 0;
+        }
+
+        public bool Overlaps(LiveInterval other)
+        {
+            foreach (LiveRange range in other._ranges)
+            {
+                if (_ranges.BinarySearch(range) >= 0)
+                {
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        public int GetOverlapPosition(LiveInterval other)
+        {
+            foreach (LiveRange range in other._ranges)
+            {
+                int overlapIndex = _ranges.BinarySearch(range);
+
+                if (overlapIndex >= 0)
+                {
+                    // It's possible that we have multiple overlaps within a single interval,
+                    // in this case, we pick the one with the lowest start position, since
+                    // we return the first overlap position.
+                    while (overlapIndex > 0 && _ranges[overlapIndex - 1].End > range.Start)
+                    {
+                        overlapIndex--;
+                    }
+
+                    LiveRange overlappingRange = _ranges[overlapIndex];
+
+                    return overlappingRange.Start;
+                }
+            }
+
+            return NotFound;
+        }
+
+        public IEnumerable<LiveInterval> SplitChilds()
+        {
+            return _childs.Values;
+        }
+
+        public IEnumerable<int> UsePositions()
+        {
+            return _usePositions;
+        }
+
+        public int FirstUse()
+        {
+            if (_usePositions.Count == 0)
+            {
+                return NotFound;
+            }
+
+            return _usePositions.First();
+        }
+
+        public int NextUseAfter(int position)
+        {
+            foreach (int usePosition in _usePositions)
+            {
+                if (usePosition >= position)
+                {
+                    return usePosition;
+                }
+            }
+
+            return NotFound;
+        }
+
+        public LiveInterval Split(int position)
+        {
+            LiveInterval right = new LiveInterval(Local, _parent);
+
+            int splitIndex = 0;
+
+            for (; splitIndex < _ranges.Count; splitIndex++)
+            {
+                LiveRange range = _ranges[splitIndex];
+
+                if (position > range.Start && position <= range.End)
+                {
+                    right._ranges.Add(new LiveRange(position, range.End));
+
+                    range = new LiveRange(range.Start, position);
+
+                    _ranges[splitIndex++] = range;
+
+                    break;
+                }
+
+                if (range.Start >= position)
+                {
+                    break;
+                }
+            }
+
+            if (splitIndex < _ranges.Count)
+            {
+                int count = _ranges.Count - splitIndex;
+
+                right._ranges.AddRange(_ranges.GetRange(splitIndex, count));
+
+                _ranges.RemoveRange(splitIndex, count);
+            }
+
+            foreach (int usePosition in _usePositions.Where(x => x >= position))
+            {
+                right._usePositions.Add(usePosition);
+            }
+
+            _usePositions.RemoveWhere(x => x >= position);
+
+            Debug.Assert(_ranges.Count != 0, "Left interval is empty after split.");
+
+            Debug.Assert(right._ranges.Count != 0, "Right interval is empty after split.");
+
+            AddSplitChild(right);
+
+            return right;
+        }
+
+        private void AddSplitChild(LiveInterval child)
+        {
+            Debug.Assert(!child.IsEmpty, "Trying to insert a empty interval.");
+
+            _parent._childs.Add(child.GetStart(), child);
+        }
+
+        public LiveInterval GetSplitChild(int position)
+        {
+            if (Overlaps(position))
+            {
+                return this;
+            }
+
+            foreach (LiveInterval splitChild in _childs.Values)
+            {
+                if (splitChild.Overlaps(position))
+                {
+                    return splitChild;
+                }
+            }
+
+            return null;
+        }
+
+        public bool TrySpillWithSiblingOffset()
+        {
+            foreach (LiveInterval splitChild in _parent._childs.Values)
+            {
+                if (splitChild.IsSpilled)
+                {
+                    Spill(splitChild.SpillOffset);
+
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        public void Spill(int offset)
+        {
+            SpillOffset = offset;
+        }
+
+        public int CompareTo(LiveInterval other)
+        {
+            if (_ranges.Count == 0 || other._ranges.Count == 0)
+            {
+                return _ranges.Count.CompareTo(other._ranges.Count);
+            }
+
+            return _ranges[0].Start.CompareTo(other._ranges[0].Start);
+        }
+
+        public override string ToString()
+        {
+            return string.Join("; ", _ranges);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
new file mode 100644
index 000000000..b5faeffd5
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
@@ -0,0 +1,31 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+    struct LiveRange : IComparable<LiveRange>
+    {
+        public int Start { get; }
+        public int End   { get; }
+
+        public LiveRange(int start, int end)
+        {
+            Start = start;
+            End   = end;
+        }
+
+        public int CompareTo(LiveRange other)
+        {
+            if (Start < other.End && other.Start < End)
+            {
+                return 0;
+            }
+
+            return Start.CompareTo(other.Start);
+        }
+
+        public override string ToString()
+        {
+            return $"[{Start}, {End}[";
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
new file mode 100644
index 000000000..9652224e5
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+    struct RegisterMasks
+    {
+        public int IntAvailableRegisters   { get; }
+        public int VecAvailableRegisters   { get; }
+        public int IntCallerSavedRegisters { get; }
+        public int VecCallerSavedRegisters { get; }
+        public int IntCalleeSavedRegisters { get; }
+        public int VecCalleeSavedRegisters { get; }
+
+        public RegisterMasks(
+            int intAvailableRegisters,
+            int vecAvailableRegisters,
+            int intCallerSavedRegisters,
+            int vecCallerSavedRegisters,
+            int intCalleeSavedRegisters,
+            int vecCalleeSavedRegisters)
+        {
+            IntAvailableRegisters   = intAvailableRegisters;
+            VecAvailableRegisters   = vecAvailableRegisters;
+            IntCallerSavedRegisters = intCallerSavedRegisters;
+            VecCallerSavedRegisters = vecCallerSavedRegisters;
+            IntCalleeSavedRegisters = intCalleeSavedRegisters;
+            VecCalleeSavedRegisters = vecCalleeSavedRegisters;
+        }
+
+        public int GetAvailableRegisters(RegisterType type)
+        {
+            if (type == RegisterType.Integer)
+            {
+                return IntAvailableRegisters;
+            }
+            else if (type == RegisterType.Vector)
+            {
+                return VecAvailableRegisters;
+            }
+            else
+            {
+                throw new ArgumentException($"Invalid register type \"{type}\".");
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
new file mode 100644
index 000000000..a6233d6ee
--- /dev/null
+++ b/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
@@ -0,0 +1,27 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+    class StackAllocator
+    {
+        private int _offset;
+
+        public int TotalSize => _offset;
+
+        public int Allocate(OperandType type)
+        {
+            return Allocate(type.GetSizeInBytes());
+        }
+
+        public int Allocate(int sizeInBytes)
+        {
+            int offset = _offset;
+
+            _offset += sizeInBytes;
+
+            return offset;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
new file mode 100644
index 000000000..4955f1b4a
--- /dev/null
+++ b/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+    struct UnwindInfo
+    {
+        public UnwindPushEntry[] PushEntries { get; }
+
+        public int PrologueSize { get; }
+
+        public int FixedAllocSize { get; }
+
+        public UnwindInfo(UnwindPushEntry[] pushEntries, int prologueSize, int fixedAllocSize)
+        {
+            PushEntries    = pushEntries;
+            PrologueSize   = prologueSize;
+            FixedAllocSize = fixedAllocSize;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
new file mode 100644
index 000000000..6597e2b4b
--- /dev/null
+++ b/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
@@ -0,0 +1,20 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.Unwinding
+{
+    struct UnwindPushEntry
+    {
+        public int Index { get; }
+
+        public RegisterType Type { get; }
+
+        public int StreamEndOffset { get; }
+
+        public UnwindPushEntry(int index, RegisterType type, int streamEndOffset)
+        {
+            Index           = index;
+            Type            = type;
+            StreamEndOffset = streamEndOffset;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
new file mode 100644
index 000000000..c64838945
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -0,0 +1,1358 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.X86
+{
+    class Assembler
+    {
+        private const int BadOp       = 0;
+        private const int OpModRMBits = 24;
+
+        private const byte RexPrefix  = 0x40;
+        private const byte RexWPrefix = 0x48;
+        private const byte LockPrefix = 0xf0;
+
+        [Flags]
+        private enum InstructionFlags
+        {
+            None     = 0,
+            RegOnly  = 1 << 0,
+            Reg8Src  = 1 << 1,
+            Reg8Dest = 1 << 2,
+            RexW     = 1 << 3,
+            Vex      = 1 << 4,
+
+            PrefixBit  = 16,
+            PrefixMask = 3 << PrefixBit,
+            Prefix66   = 1 << PrefixBit,
+            PrefixF3   = 2 << PrefixBit,
+            PrefixF2   = 3 << PrefixBit
+        }
+
+        private struct InstructionInfo
+        {
+            public int OpRMR     { get; }
+            public int OpRMImm8  { get; }
+            public int OpRMImm32 { get; }
+            public int OpRImm64  { get; }
+            public int OpRRM     { get; }
+
+            public InstructionFlags Flags { get; }
+
+            public InstructionInfo(
+                int              opRMR,
+                int              opRMImm8,
+                int              opRMImm32,
+                int              opRImm64,
+                int              opRRM,
+                InstructionFlags flags)
+            {
+                OpRMR     = opRMR;
+                OpRMImm8  = opRMImm8;
+                OpRMImm32 = opRMImm32;
+                OpRImm64  = opRImm64;
+                OpRRM     = opRRM;
+                Flags     = flags;
+            }
+        }
+
+        private static InstructionInfo[] _instTable;
+
+        private Stream _stream;
+
+        static Assembler()
+        {
+            _instTable = new InstructionInfo[(int)X86Instruction.Count];
+
+            //  Name                                           RM/R        RM/I8       RM/I32      R/I64       R/RM        Flags
+            Add(X86Instruction.Add,        new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp,      0x00000003, InstructionFlags.None));
+            Add(X86Instruction.Addpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Addps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex));
+            Add(X86Instruction.Addsd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Addss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.And,        new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp,      0x00000023, InstructionFlags.None));
+            Add(X86Instruction.Andnpd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Andnps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f55, InstructionFlags.Vex));
+            Add(X86Instruction.Bsr,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fbd, InstructionFlags.None));
+            Add(X86Instruction.Bswap,      new InstructionInfo(0x00000fc8, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.RegOnly));
+            Add(X86Instruction.Call,       new InstructionInfo(0x020000ff, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Cmovcc,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f40, InstructionFlags.None));
+            Add(X86Instruction.Cmp,        new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp,      0x0000003b, InstructionFlags.None));
+            Add(X86Instruction.Cmppd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Cmpps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex));
+            Add(X86Instruction.Cmpsd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cmpss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.RexW));
+            Add(X86Instruction.Comisd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Comiss,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2f, InstructionFlags.Vex));
+            Add(X86Instruction.Cpuid,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fa2, InstructionFlags.RegOnly));
+            Add(X86Instruction.Cvtdq2pd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Cvtdq2ps,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5b, InstructionFlags.Vex));
+            Add(X86Instruction.Cvtpd2dq,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cvtpd2ps,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Cvtps2dq,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Cvtps2pd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex));
+            Add(X86Instruction.Cvtsd2si,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cvtsd2ss,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cvtsi2sd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Cvtsi2ss,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Cvtss2sd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Div,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x060000f7, InstructionFlags.None));
+            Add(X86Instruction.Divpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Divps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex));
+            Add(X86Instruction.Divsd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Divss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Haddpd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Haddps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Idiv,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x070000f7, InstructionFlags.None));
+            Add(X86Instruction.Imul,       new InstructionInfo(BadOp,      0x0000006b, 0x00000069, BadOp,      0x00000faf, InstructionFlags.None));
+            Add(X86Instruction.Imul128,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x050000f7, InstructionFlags.None));
+            Add(X86Instruction.Insertps,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Lea,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x0000008d, InstructionFlags.None));
+            Add(X86Instruction.Maxpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Maxps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex));
+            Add(X86Instruction.Maxsd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Maxss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Minpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Minps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex));
+            Add(X86Instruction.Minsd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Minss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Mov,        new InstructionInfo(0x00000089, BadOp,      0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None));
+            Add(X86Instruction.Mov16,      new InstructionInfo(0x00000089, BadOp,      0x000000c7, BadOp,      0x0000008b, InstructionFlags.Prefix66));
+            Add(X86Instruction.Mov8,       new InstructionInfo(0x00000088, 0x000000c6, BadOp,      BadOp,      0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest));
+            Add(X86Instruction.Movd,       new InstructionInfo(0x00000f7e, BadOp,      BadOp,      BadOp,      0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Movdqu,     new InstructionInfo(0x00000f7f, BadOp,      BadOp,      BadOp,      0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Movhlps,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f12, InstructionFlags.Vex));
+            Add(X86Instruction.Movlhps,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f16, InstructionFlags.Vex));
+            Add(X86Instruction.Movq,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Movsd,      new InstructionInfo(0x00000f11, BadOp,      BadOp,      BadOp,      0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Movss,      new InstructionInfo(0x00000f11, BadOp,      BadOp,      BadOp,      0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Movsx16,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fbf, InstructionFlags.None));
+            Add(X86Instruction.Movsx32,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000063, InstructionFlags.None));
+            Add(X86Instruction.Movsx8,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fbe, InstructionFlags.Reg8Src));
+            Add(X86Instruction.Movzx16,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fb7, InstructionFlags.None));
+            Add(X86Instruction.Movzx8,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fb6, InstructionFlags.Reg8Src));
+            Add(X86Instruction.Mul128,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x040000f7, InstructionFlags.None));
+            Add(X86Instruction.Mulpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Mulps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex));
+            Add(X86Instruction.Mulsd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Mulss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Neg,        new InstructionInfo(0x030000f7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Not,        new InstructionInfo(0x020000f7, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Or,         new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp,      0x0000000b, InstructionFlags.None));
+            Add(X86Instruction.Paddb,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Paddd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Paddq,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Paddw,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pand,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pandn,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pavgb,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pavgw,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pblendvb,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3810, InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpeqb,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpeqd,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpeqq,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpeqw,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpgtb,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpgtd,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpgtq,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pcmpgtw,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pextrb,     new InstructionInfo(0x000f3a14, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pextrd,     new InstructionInfo(0x000f3a16, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pextrq,     new InstructionInfo(0x000f3a16, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pextrw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pinsrb,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pinsrd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pinsrq,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pinsrw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxsb,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxsd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxsw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxub,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxud,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmaxuw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminsb,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminsd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminsw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminub,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminud,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pminuw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovsxbw,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovsxdq,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovsxwd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovzxbw,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovzxdq,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmovzxwd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmulld,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pmullw,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pop,        new InstructionInfo(0x0000008f, BadOp,      BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Popcnt,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fb8, InstructionFlags.PrefixF3));
+            Add(X86Instruction.Por,        new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pshufb,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pshufd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pslld,      new InstructionInfo(BadOp,      0x06000f72, BadOp,      BadOp,      0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Pslldq,     new InstructionInfo(BadOp,      0x07000f73, BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psllq,      new InstructionInfo(BadOp,      0x06000f73, BadOp,      BadOp,      0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psllw,      new InstructionInfo(BadOp,      0x06000f71, BadOp,      BadOp,      0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrad,      new InstructionInfo(BadOp,      0x04000f72, BadOp,      BadOp,      0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psraw,      new InstructionInfo(BadOp,      0x04000f71, BadOp,      BadOp,      0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrld,      new InstructionInfo(BadOp,      0x02000f72, BadOp,      BadOp,      0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrlq,      new InstructionInfo(BadOp,      0x02000f73, BadOp,      BadOp,      0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrldq,     new InstructionInfo(BadOp,      0x03000f73, BadOp,      BadOp,      BadOp,      InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psrlw,      new InstructionInfo(BadOp,      0x02000f71, BadOp,      BadOp,      0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psubb,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psubd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psubq,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Psubw,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckhbw,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckhdq,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckhwd,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpcklbw,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpckldq,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Punpcklwd,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Push,       new InstructionInfo(BadOp,      0x0000006a, 0x00000068, BadOp,      0x060000ff, InstructionFlags.None));
+            Add(X86Instruction.Pxor,       new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Rcpps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f53, InstructionFlags.Vex));
+            Add(X86Instruction.Rcpss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Ror,        new InstructionInfo(0x010000d3, 0x010000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Roundpd,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Roundps,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Roundsd,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Roundss,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Rsqrtps,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f52, InstructionFlags.Vex));
+            Add(X86Instruction.Rsqrtss,    new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Sar,        new InstructionInfo(0x070000d3, 0x070000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Setcc,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f90, InstructionFlags.Reg8Dest));
+            Add(X86Instruction.Shl,        new InstructionInfo(0x040000d3, 0x040000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Shr,        new InstructionInfo(0x050000d3, 0x050000c1, BadOp,      BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Shufpd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Shufps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000fc6, InstructionFlags.Vex));
+            Add(X86Instruction.Sqrtpd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Sqrtps,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex));
+            Add(X86Instruction.Sqrtsd,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Sqrtss,     new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Sub,        new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp,      0x0000002b, InstructionFlags.None));
+            Add(X86Instruction.Subpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Subps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex));
+            Add(X86Instruction.Subsd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+            Add(X86Instruction.Subss,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+            Add(X86Instruction.Test,       new InstructionInfo(0x00000085, BadOp,      0x000000f7, BadOp,      BadOp,      InstructionFlags.None));
+            Add(X86Instruction.Unpckhpd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Unpckhps,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f15, InstructionFlags.Vex));
+            Add(X86Instruction.Unpcklpd,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Unpcklps,   new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f14, InstructionFlags.Vex));
+            Add(X86Instruction.Vpblendvb,  new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Xor,        new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp,      0x00000033, InstructionFlags.None));
+            Add(X86Instruction.Xorpd,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
+            Add(X86Instruction.Xorps,      new InstructionInfo(BadOp,      BadOp,      BadOp,      BadOp,      0x00000f57, InstructionFlags.Vex));
+        }
+
+        private static void Add(X86Instruction inst, InstructionInfo info)
+        {
+            _instTable[(int)inst] = info;
+        }
+
+        public Assembler(Stream stream)
+        {
+            _stream = stream;
+        }
+
+        public void Add(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Add);
+        }
+
+        public void Addsd(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Addsd);
+        }
+
+        public void Addss(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Addss);
+        }
+
+        public void And(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.And);
+        }
+
+        public void Bsr(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Bsr);
+        }
+
+        public void Bswap(Operand dest)
+        {
+            WriteInstruction(dest, null, dest.Type, X86Instruction.Bswap);
+        }
+
+        public void Call(Operand dest)
+        {
+            WriteInstruction(dest, null, OperandType.None, X86Instruction.Call);
+        }
+
+        public void Cdq()
+        {
+            WriteByte(0x99);
+        }
+
+        public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition)
+        {
+            InstructionInfo info = _instTable[(int)X86Instruction.Cmovcc];
+
+            WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true);
+        }
+
+        public void Cmp(Operand src1, Operand src2, OperandType type)
+        {
+            WriteInstruction(src1, src2, type, X86Instruction.Cmp);
+        }
+
+        public void Cqo()
+        {
+            WriteByte(0x48);
+            WriteByte(0x99);
+        }
+
+        public void Cmpxchg16b(MemoryOperand memOp)
+        {
+            WriteByte(LockPrefix);
+
+            WriteInstruction(memOp, null, OperandType.None, X86Instruction.Cmpxchg16b);
+        }
+
+        public void Comisd(Operand src1, Operand src2)
+        {
+            WriteInstruction(src1, null, src2, X86Instruction.Comisd);
+        }
+
+        public void Comiss(Operand src1, Operand src2)
+        {
+            WriteInstruction(src1, null, src2, X86Instruction.Comiss);
+        }
+
+        public void Cpuid()
+        {
+            WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid);
+        }
+
+        public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
+        }
+
+        public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type);
+        }
+
+        public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type);
+        }
+
+        public void Cvtss2sd(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd);
+        }
+
+        public void Div(Operand source)
+        {
+            WriteInstruction(null, source, source.Type, X86Instruction.Div);
+        }
+
+        public void Divsd(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Divsd);
+        }
+
+        public void Divss(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Divss);
+        }
+
+        public void Idiv(Operand source)
+        {
+            WriteInstruction(null, source, source.Type, X86Instruction.Idiv);
+        }
+
+        public void Imul(Operand source)
+        {
+            WriteInstruction(null, source, source.Type, X86Instruction.Imul128);
+        }
+
+        public void Imul(Operand dest, Operand source, OperandType type)
+        {
+            if (source.Kind != OperandKind.Register)
+            {
+                throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+            }
+
+            WriteInstruction(dest, source, type, X86Instruction.Imul);
+        }
+
+        public void Imul(Operand dest, Operand src1, Operand src2, OperandType type)
+        {
+            InstructionInfo info = _instTable[(int)X86Instruction.Imul];
+
+            if (src2.Kind != OperandKind.Constant)
+            {
+                throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\".");
+            }
+
+            if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp)
+            {
+                WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm8, rrm: true);
+
+                WriteByte(src2.AsByte());
+            }
+            else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp)
+            {
+                WriteOpCode(dest, null, src1, type, info.Flags, info.OpRMImm32, rrm: true);
+
+                WriteInt32(src2.AsInt32());
+            }
+            else
+            {
+                throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}.");
+            }
+        }
+
+        public void Insertps(Operand dest, Operand src1, Operand src2, byte imm)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Insertps);
+
+            WriteByte(imm);
+        }
+
+        public void Jcc(X86Condition condition, long offset)
+        {
+            if (ConstFitsOnS8(offset))
+            {
+                WriteByte((byte)(0x70 | (int)condition));
+
+                WriteByte((byte)offset);
+            }
+            else if (ConstFitsOnS32(offset))
+            {
+                WriteByte(0x0f);
+                WriteByte((byte)(0x80 | (int)condition));
+
+                WriteInt32((int)offset);
+            }
+            else
+            {
+                throw new ArgumentOutOfRangeException(nameof(offset));
+            }
+        }
+
+        public void Jmp(long offset)
+        {
+            if (ConstFitsOnS8(offset))
+            {
+                WriteByte(0xeb);
+
+                WriteByte((byte)offset);
+            }
+            else if (ConstFitsOnS32(offset))
+            {
+                WriteByte(0xe9);
+
+                WriteInt32((int)offset);
+            }
+            else
+            {
+                throw new ArgumentOutOfRangeException(nameof(offset));
+            }
+        }
+
+        public void Lea(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Lea);
+        }
+
+        public void Mov(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Mov);
+        }
+
+        public void Mov16(Operand dest, Operand source)
+        {
+            WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16);
+        }
+
+        public void Mov8(Operand dest, Operand source)
+        {
+            WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8);
+        }
+
+        public void Movd(Operand dest, Operand source)
+        {
+            InstructionInfo info = _instTable[(int)X86Instruction.Movd];
+
+            if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+            {
+                WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRRM, rrm: true);
+            }
+            else
+            {
+                WriteOpCode(dest, null, source, OperandType.None, info.Flags, info.OpRMR);
+            }
+        }
+
+        public void Movdqu(Operand dest, Operand source)
+        {
+            WriteInstruction(dest, null, source, X86Instruction.Movdqu);
+        }
+
+        public void Movhlps(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Movhlps);
+        }
+
+        public void Movlhps(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Movlhps);
+        }
+
+        public void Movq(Operand dest, Operand source)
+        {
+            InstructionInfo info = _instTable[(int)X86Instruction.Movd];
+
+            InstructionFlags flags = info.Flags | InstructionFlags.RexW;
+
+            if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+            {
+                WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRRM, rrm: true);
+            }
+            else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory)
+            {
+                WriteOpCode(dest, null, source, OperandType.None, flags, info.OpRMR);
+            }
+            else
+            {
+                WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq);
+            }
+        }
+
+        public void Movsd(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Movsd);
+        }
+
+        public void Movss(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Movss);
+        }
+
+        public void Movsx16(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Movsx16);
+        }
+
+        public void Movsx32(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Movsx32);
+        }
+
+        public void Movsx8(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Movsx8);
+        }
+
+        public void Movzx16(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Movzx16);
+        }
+
+        public void Movzx8(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Movzx8);
+        }
+
+        public void Mul(Operand source)
+        {
+            WriteInstruction(null, source, source.Type, X86Instruction.Mul128);
+        }
+
+        public void Mulsd(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Mulsd);
+        }
+
+        public void Mulss(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Mulss);
+        }
+
+        public void Neg(Operand dest)
+        {
+            WriteInstruction(dest, null, dest.Type, X86Instruction.Neg);
+        }
+
+        public void Not(Operand dest)
+        {
+            WriteInstruction(dest, null, dest.Type, X86Instruction.Not);
+        }
+
+        public void Or(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Or);
+        }
+
+        public void Pcmpeqw(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw);
+        }
+
+        public void Pextrb(Operand dest, Operand source, byte imm)
+        {
+            WriteInstruction(dest, null, source, X86Instruction.Pextrb);
+
+            WriteByte(imm);
+        }
+
+        public void Pextrd(Operand dest, Operand source, byte imm)
+        {
+            WriteInstruction(dest, null, source, X86Instruction.Pextrd);
+
+            WriteByte(imm);
+        }
+
+        public void Pextrq(Operand dest, Operand source, byte imm)
+        {
+            WriteInstruction(dest, null, source, X86Instruction.Pextrq);
+
+            WriteByte(imm);
+        }
+
+        public void Pextrw(Operand dest, Operand source, byte imm)
+        {
+            WriteInstruction(dest, null, source, X86Instruction.Pextrw);
+
+            WriteByte(imm);
+        }
+
+        public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb);
+
+            WriteByte(imm);
+        }
+
+        public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd);
+
+            WriteByte(imm);
+        }
+
+        public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq);
+
+            WriteByte(imm);
+        }
+
+        public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw);
+
+            WriteByte(imm);
+        }
+
+        public void Pop(Operand dest)
+        {
+            if (dest.Kind == OperandKind.Register)
+            {
+                WriteCompactInst(dest, 0x58);
+            }
+            else
+            {
+                WriteInstruction(dest, null, dest.Type, X86Instruction.Pop);
+            }
+        }
+
+        public void Popcnt(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Popcnt);
+        }
+
+        public void Pshufd(Operand dest, Operand source, byte imm)
+        {
+            WriteInstruction(dest, null, source, X86Instruction.Pshufd);
+
+            WriteByte(imm);
+        }
+
+        public void Push(Operand source)
+        {
+            if (source.Kind == OperandKind.Register)
+            {
+                WriteCompactInst(source, 0x50);
+            }
+            else
+            {
+                WriteInstruction(null, source, source.Type, X86Instruction.Push);
+            }
+        }
+
+        public void Return()
+        {
+            WriteByte(0xc3);
+        }
+
+        public void Ror(Operand dest, Operand source, OperandType type)
+        {
+            WriteShiftInst(dest, source, type, X86Instruction.Ror);
+        }
+
+        public void Sar(Operand dest, Operand source, OperandType type)
+        {
+            WriteShiftInst(dest, source, type, X86Instruction.Sar);
+        }
+
+        public void Shl(Operand dest, Operand source, OperandType type)
+        {
+            WriteShiftInst(dest, source, type, X86Instruction.Shl);
+        }
+
+        public void Shr(Operand dest, Operand source, OperandType type)
+        {
+            WriteShiftInst(dest, source, type, X86Instruction.Shr);
+        }
+
+        public void Setcc(Operand dest, X86Condition condition)
+        {
+            InstructionInfo info = _instTable[(int)X86Instruction.Setcc];
+
+            WriteOpCode(dest, null, null, OperandType.None, info.Flags, info.OpRRM | (int)condition);
+        }
+
+        public void Sub(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Sub);
+        }
+
+        public void Subsd(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Subsd);
+        }
+
+        public void Subss(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Subss);
+        }
+
+        public void Test(Operand src1, Operand src2, OperandType type)
+        {
+            WriteInstruction(src1, src2, type, X86Instruction.Test);
+        }
+
+        public void Xor(Operand dest, Operand source, OperandType type)
+        {
+            WriteInstruction(dest, source, type, X86Instruction.Xor);
+        }
+
+        public void Xorps(Operand dest, Operand src1, Operand src2)
+        {
+            WriteInstruction(dest, src1, src2, X86Instruction.Xorps);
+        }
+
+        public void WriteInstruction(
+            X86Instruction inst,
+            Operand dest,
+            Operand source,
+            OperandType type = OperandType.None)
+        {
+            WriteInstruction(dest, null, source, inst, type);
+        }
+
+        public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2)
+        {
+            if (src2.Kind == OperandKind.Constant)
+            {
+                WriteInstruction(src1, dest, src2, inst);
+            }
+            else
+            {
+                WriteInstruction(dest, src1, src2, inst);
+            }
+        }
+
+        public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm)
+        {
+            WriteInstruction(dest, null, source, inst);
+
+            WriteByte(imm);
+        }
+
+        public void WriteInstruction(
+            X86Instruction inst,
+            Operand dest,
+            Operand src1,
+            Operand src2,
+            Operand src3)
+        {
+            // 3+ operands can only be encoded with the VEX encoding scheme.
+            Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
+
+            WriteInstruction(dest, src1, src2, inst);
+
+            WriteByte((byte)(src3.AsByte() << 4));
+        }
+
+        public void WriteInstruction(
+            X86Instruction inst,
+            Operand dest,
+            Operand src1,
+            Operand src2,
+            byte imm)
+        {
+            WriteInstruction(dest, src1, src2, inst);
+
+            WriteByte(imm);
+        }
+
+        private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst)
+        {
+            if (source.Kind == OperandKind.Register)
+            {
+                X86Register shiftReg = (X86Register)source.GetRegister().Index;
+
+                if (shiftReg != X86Register.Rcx)
+                {
+                    throw new ArgumentException($"Invalid shift register \"{shiftReg}\".");
+                }
+
+                source = null;
+            }
+
+            WriteInstruction(dest, source, type, inst);
+        }
+
+        private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst)
+        {
+            InstructionInfo info = _instTable[(int)inst];
+
+            if (source != null)
+            {
+                if (source.Kind == OperandKind.Constant)
+                {
+                    ulong imm = source.Value;
+
+                    if (inst == X86Instruction.Mov8)
+                    {
+                        WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8);
+
+                        WriteByte((byte)imm);
+                    }
+                    else if (inst == X86Instruction.Mov16)
+                    {
+                        WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32);
+
+                        WriteInt16((short)imm);
+                    }
+                    else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp)
+                    {
+                        WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm8);
+
+                        WriteByte((byte)imm);
+                    }
+                    else if (IsImm32(imm, type) && info.OpRMImm32 != BadOp)
+                    {
+                        WriteOpCode(dest, null, null, type, info.Flags, info.OpRMImm32);
+
+                        WriteInt32((int)imm);
+                    }
+                    else if (dest != null && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp)
+                    {
+                        int rexPrefix = GetRexPrefix(dest, source, type, rrm: false);
+
+                        if (rexPrefix != 0)
+                        {
+                            WriteByte((byte)rexPrefix);
+                        }
+
+                        WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111)));
+
+                        WriteUInt64(imm);
+                    }
+                    else
+                    {
+                        throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+                    }
+                }
+                else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp)
+                {
+                    WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR);
+                }
+                else if (info.OpRRM != BadOp)
+                {
+                    WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true);
+                }
+                else
+                {
+                    throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+                }
+            }
+            else if (info.OpRRM != BadOp)
+            {
+                WriteOpCode(dest, null, source, type, info.Flags, info.OpRRM, rrm: true);
+            }
+            else if (info.OpRMR != BadOp)
+            {
+                WriteOpCode(dest, null, source, type, info.Flags, info.OpRMR);
+            }
+            else
+            {
+                throw new ArgumentNullException(nameof(source));
+            }
+        }
+
+        private void WriteInstruction(
+            Operand dest,
+            Operand src1,
+            Operand src2,
+            X86Instruction inst,
+            OperandType type = OperandType.None)
+        {
+            InstructionInfo info = _instTable[(int)inst];
+
+            if (src2 != null)
+            {
+                if (src2.Kind == OperandKind.Constant)
+                {
+                    ulong imm = src2.Value;
+
+                    if ((byte)imm == imm && info.OpRMImm8 != BadOp)
+                    {
+                        WriteOpCode(dest, src1, null, type, info.Flags, info.OpRMImm8);
+
+                        WriteByte((byte)imm);
+                    }
+                    else
+                    {
+                        throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+                    }
+                }
+                else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp)
+                {
+                    WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+                }
+                else if (info.OpRRM != BadOp)
+                {
+                    WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+                }
+                else
+                {
+                    throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\".");
+                }
+            }
+            else if (info.OpRRM != BadOp)
+            {
+                WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+            }
+            else if (info.OpRMR != BadOp)
+            {
+                WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+            }
+            else
+            {
+                throw new ArgumentNullException(nameof(src2));
+            }
+        }
+
+        private void WriteOpCode(
+            Operand dest,
+            Operand src1,
+            Operand src2,
+            OperandType type,
+            InstructionFlags flags,
+            int opCode,
+            bool rrm = false)
+        {
+            int rexPrefix = GetRexPrefix(dest, src2, type, rrm);
+
+            if ((flags & InstructionFlags.RexW) != 0)
+            {
+                rexPrefix |= RexWPrefix;
+            }
+
+            int modRM = (opCode >> OpModRMBits) << 3;
+
+            MemoryOperand memOp = null;
+
+            if (dest != null)
+            {
+                if (dest.Kind == OperandKind.Register)
+                {
+                    int regIndex = dest.GetRegister().Index;
+
+                    modRM |= (regIndex & 0b111) << (rrm ? 3 : 0);
+
+                    if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4)
+                    {
+                        rexPrefix |= RexPrefix;
+                    }
+                }
+                else if (dest.Kind == OperandKind.Memory)
+                {
+                    memOp = dest as MemoryOperand;
+                }
+                else
+                {
+                    throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\".");
+                }
+            }
+
+            if (src2 != null)
+            {
+                if (src2.Kind == OperandKind.Register)
+                {
+                    int regIndex = src2.GetRegister().Index;
+
+                    modRM |= (regIndex & 0b111) << (rrm ? 0 : 3);
+
+                    if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4)
+                    {
+                        rexPrefix |= RexPrefix;
+                    }
+                }
+                else if (src2.Kind == OperandKind.Memory && memOp == null)
+                {
+                    memOp = src2 as MemoryOperand;
+                }
+                else
+                {
+                    throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\".");
+                }
+            }
+
+            bool needsSibByte      = false;
+            bool needsDisplacement = false;
+
+            int sib = 0;
+
+            if (memOp != null)
+            {
+                // Either source or destination is a memory operand.
+                Register baseReg = memOp.BaseAddress.GetRegister();
+
+                X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111);
+
+                needsSibByte      = memOp.Index != null     || baseRegLow == X86Register.Rsp;
+                needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp;
+
+                if (needsDisplacement)
+                {
+                    if (ConstFitsOnS8(memOp.Displacement))
+                    {
+                        modRM |= 0x40;
+                    }
+                    else /* if (ConstFitsOnS32(memOp.Displacement)) */
+                    {
+                        modRM |= 0x80;
+                    }
+                }
+
+                if (baseReg.Index >= 8)
+                {
+                    rexPrefix |= RexPrefix | (baseReg.Index >> 3);
+                }
+
+                if (needsSibByte)
+                {
+                    sib = (int)baseRegLow;
+
+                    if (memOp.Index != null)
+                    {
+                        int indexReg = memOp.Index.GetRegister().Index;
+
+                        if (indexReg == (int)X86Register.Rsp)
+                        {
+                            throw new ArgumentException("Using RSP as index register on the memory operand is not allowed.");
+                        }
+
+                        if (indexReg >= 8)
+                        {
+                            rexPrefix |= RexPrefix | (indexReg >> 3) << 1;
+                        }
+
+                        sib |= (indexReg & 0b111) << 3;
+                    }
+                    else
+                    {
+                        sib |= 0b100 << 3;
+                    }
+
+                    sib |= (int)memOp.Scale << 6;
+
+                    modRM |= 0b100;
+                }
+                else
+                {
+                    modRM |= (int)baseRegLow;
+                }
+            }
+            else
+            {
+                // Source and destination are registers.
+                modRM |= 0xc0;
+            }
+
+            Debug.Assert(opCode != BadOp, "Invalid opcode value.");
+
+            if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
+            {
+                int vexByte2 = (int)(flags & InstructionFlags.PrefixMask) >> (int)InstructionFlags.PrefixBit;
+
+                if (src1 != null)
+                {
+                    vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3;
+                }
+                else
+                {
+                    vexByte2 |= 0b1111 << 3;
+                }
+
+                ushort opCodeHigh = (ushort)(opCode >> 8);
+
+                if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf)
+                {
+                    // Two-byte form.
+                    WriteByte(0xc5);
+
+                    vexByte2 |= (~rexPrefix & 4) << 5;
+
+                    WriteByte((byte)vexByte2);
+                }
+                else
+                {
+                    // Three-byte form.
+                    WriteByte(0xc4);
+
+                    int vexByte1 = (~rexPrefix & 7) << 5;
+
+                    switch (opCodeHigh)
+                    {
+                        case 0xf:   vexByte1 |= 1; break;
+                        case 0xf38: vexByte1 |= 2; break;
+                        case 0xf3a: vexByte1 |= 3; break;
+
+                        default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break;
+                    }
+
+                    vexByte2 |= (rexPrefix & 8) << 4;
+
+                    WriteByte((byte)vexByte1);
+                    WriteByte((byte)vexByte2);
+                }
+
+                opCode &= 0xff;
+            }
+            else
+            {
+                switch (flags & InstructionFlags.PrefixMask)
+                {
+                    case InstructionFlags.Prefix66: WriteByte(0x66); break;
+                    case InstructionFlags.PrefixF2: WriteByte(0xf2); break;
+                    case InstructionFlags.PrefixF3: WriteByte(0xf3); break;
+                }
+
+                if (rexPrefix != 0)
+                {
+                    WriteByte((byte)rexPrefix);
+                }
+            }
+
+            if (dest != null && (flags & InstructionFlags.RegOnly) != 0)
+            {
+                opCode += dest.GetRegister().Index & 7;
+            }
+
+            if ((opCode & 0xff0000) != 0)
+            {
+                WriteByte((byte)(opCode >> 16));
+            }
+
+            if ((opCode & 0xff00) != 0)
+            {
+                WriteByte((byte)(opCode >> 8));
+            }
+
+            WriteByte((byte)opCode);
+
+            if ((flags & InstructionFlags.RegOnly) == 0)
+            {
+                WriteByte((byte)modRM);
+
+                if (needsSibByte)
+                {
+                    WriteByte((byte)sib);
+                }
+
+                if (needsDisplacement)
+                {
+                    if (ConstFitsOnS8(memOp.Displacement))
+                    {
+                        WriteByte((byte)memOp.Displacement);
+                    }
+                    else /* if (ConstFitsOnS32(memOp.Displacement)) */
+                    {
+                        WriteInt32(memOp.Displacement);
+                    }
+                }
+            }
+        }
+
+        private void WriteCompactInst(Operand operand, int opCode)
+        {
+            int regIndex = operand.GetRegister().Index;
+
+            if (regIndex >= 8)
+            {
+                WriteByte(0x41);
+            }
+
+            WriteByte((byte)(opCode + (regIndex & 0b111)));
+        }
+
+        private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm)
+        {
+            int rexPrefix = 0;
+
+            if (Is64Bits(type))
+            {
+                rexPrefix = RexWPrefix;
+            }
+
+            void SetRegisterHighBit(Register reg, int bit)
+            {
+                if (reg.Index >= 8)
+                {
+                    rexPrefix |= RexPrefix | (reg.Index >> 3) << bit;
+                }
+            }
+
+            if (dest != null && dest.Kind == OperandKind.Register)
+            {
+                SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0);
+            }
+
+            if (source != null && source.Kind == OperandKind.Register)
+            {
+                SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2);
+            }
+
+            return rexPrefix;
+        }
+
+        private static bool Is64Bits(OperandType type)
+        {
+            return type == OperandType.I64 || type == OperandType.FP64;
+        }
+
+        private static bool IsImm8(ulong immediate, OperandType type)
+        {
+            long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+            return ConstFitsOnS8(value);
+        }
+
+        private static bool IsImm32(ulong immediate, OperandType type)
+        {
+            long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+            return ConstFitsOnS32(value);
+        }
+
+        public static int GetJccLength(long offset)
+        {
+            if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+            {
+                return 2;
+            }
+            else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset))
+            {
+                return 6;
+            }
+            else
+            {
+                throw new ArgumentOutOfRangeException(nameof(offset));
+            }
+        }
+
+        public static int GetJmpLength(long offset)
+        {
+            if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+            {
+                return 2;
+            }
+            else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset))
+            {
+                return 5;
+            }
+            else
+            {
+                throw new ArgumentOutOfRangeException(nameof(offset));
+            }
+        }
+
+        private static bool ConstFitsOnS8(long value)
+        {
+            return value == (sbyte)value;
+        }
+
+        private static bool ConstFitsOnS32(long value)
+        {
+            return value == (int)value;
+        }
+
+        private void WriteInt16(short value)
+        {
+            WriteUInt16((ushort)value);
+        }
+
+        private void WriteInt32(int value)
+        {
+            WriteUInt32((uint)value);
+        }
+
+        private void WriteByte(byte value)
+        {
+            _stream.WriteByte(value);
+        }
+
+        private void WriteUInt16(ushort value)
+        {
+            _stream.WriteByte((byte)(value >> 0));
+            _stream.WriteByte((byte)(value >> 8));
+        }
+
+        private void WriteUInt32(uint value)
+        {
+            _stream.WriteByte((byte)(value >> 0));
+            _stream.WriteByte((byte)(value >> 8));
+            _stream.WriteByte((byte)(value >> 16));
+            _stream.WriteByte((byte)(value >> 24));
+        }
+
+        private void WriteUInt64(ulong value)
+        {
+            _stream.WriteByte((byte)(value >> 0));
+            _stream.WriteByte((byte)(value >> 8));
+            _stream.WriteByte((byte)(value >> 16));
+            _stream.WriteByte((byte)(value >> 24));
+            _stream.WriteByte((byte)(value >> 32));
+            _stream.WriteByte((byte)(value >> 40));
+            _stream.WriteByte((byte)(value >> 48));
+            _stream.WriteByte((byte)(value >> 56));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CallConvName.cs b/ARMeilleure/CodeGen/X86/CallConvName.cs
new file mode 100644
index 000000000..be3676282
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CallConvName.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.CodeGen.X86
+{
+    enum CallConvName
+    {
+        SystemV,
+        Windows
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CallingConvention.cs b/ARMeilleure/CodeGen/X86/CallingConvention.cs
new file mode 100644
index 000000000..2769fd93e
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CallingConvention.cs
@@ -0,0 +1,159 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.CodeGen.X86
+{
+    static class CallingConvention
+    {
+        private const int RegistersMask = 0xffff;
+
+        public static int GetIntAvailableRegisters()
+        {
+            return RegistersMask & ~(1 << (int)X86Register.Rsp);
+        }
+
+        public static int GetVecAvailableRegisters()
+        {
+            return RegistersMask;
+        }
+
+        public static int GetIntCallerSavedRegisters()
+        {
+            if (GetCurrentCallConv() == CallConvName.Windows)
+            {
+                return (1 << (int)X86Register.Rax) |
+                       (1 << (int)X86Register.Rcx) |
+                       (1 << (int)X86Register.Rdx) |
+                       (1 << (int)X86Register.R8)  |
+                       (1 << (int)X86Register.R9)  |
+                       (1 << (int)X86Register.R10) |
+                       (1 << (int)X86Register.R11);
+            }
+            else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+            {
+                return (1 << (int)X86Register.Rax) |
+                       (1 << (int)X86Register.Rcx) |
+                       (1 << (int)X86Register.Rdx) |
+                       (1 << (int)X86Register.Rsi) |
+                       (1 << (int)X86Register.Rdi) |
+                       (1 << (int)X86Register.R8)  |
+                       (1 << (int)X86Register.R9)  |
+                       (1 << (int)X86Register.R10) |
+                       (1 << (int)X86Register.R11);
+            }
+        }
+
+        public static int GetVecCallerSavedRegisters()
+        {
+            if (GetCurrentCallConv() == CallConvName.Windows)
+            {
+                return (1 << (int)X86Register.Xmm0) |
+                       (1 << (int)X86Register.Xmm1) |
+                       (1 << (int)X86Register.Xmm2) |
+                       (1 << (int)X86Register.Xmm3) |
+                       (1 << (int)X86Register.Xmm4) |
+                       (1 << (int)X86Register.Xmm5);
+            }
+            else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+            {
+                return RegistersMask;
+            }
+        }
+
+        public static int GetIntCalleeSavedRegisters()
+        {
+            return GetIntCallerSavedRegisters() ^ RegistersMask;
+        }
+
+        public static int GetVecCalleeSavedRegisters()
+        {
+            return GetVecCallerSavedRegisters() ^ RegistersMask;
+        }
+
+        public static int GetArgumentsOnRegsCount()
+        {
+            return 4;
+        }
+
+        public static int GetIntArgumentsOnRegsCount()
+        {
+            return 6;
+        }
+
+        public static int GetVecArgumentsOnRegsCount()
+        {
+            return 8;
+        }
+
+        public static X86Register GetIntArgumentRegister(int index)
+        {
+            if (GetCurrentCallConv() == CallConvName.Windows)
+            {
+                switch (index)
+                {
+                    case 0: return X86Register.Rcx;
+                    case 1: return X86Register.Rdx;
+                    case 2: return X86Register.R8;
+                    case 3: return X86Register.R9;
+                }
+            }
+            else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+            {
+                switch (index)
+                {
+                    case 0: return X86Register.Rdi;
+                    case 1: return X86Register.Rsi;
+                    case 2: return X86Register.Rdx;
+                    case 3: return X86Register.Rcx;
+                    case 4: return X86Register.R8;
+                    case 5: return X86Register.R9;
+                }
+            }
+
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+
+        public static X86Register GetVecArgumentRegister(int index)
+        {
+            int count;
+
+            if (GetCurrentCallConv() == CallConvName.Windows)
+            {
+                count = 4;
+            }
+            else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+            {
+                count = 8;
+            }
+
+            if ((uint)index < count)
+            {
+                return X86Register.Xmm0 + index;
+            }
+
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+
+        public static X86Register GetIntReturnRegister()
+        {
+            return X86Register.Rax;
+        }
+
+        public static X86Register GetIntReturnRegisterHigh()
+        {
+            return X86Register.Rdx;
+        }
+
+        public static X86Register GetVecReturnRegister()
+        {
+            return X86Register.Xmm0;
+        }
+
+        public static CallConvName GetCurrentCallConv()
+        {
+            return RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
+                ? CallConvName.Windows
+                : CallConvName.SystemV;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/ARMeilleure/CodeGen/X86/CodeGenContext.cs
new file mode 100644
index 000000000..d719b5164
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CodeGenContext.cs
@@ -0,0 +1,305 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.X86
+{
+    class CodeGenContext
+    {
+        private const int ReservedBytesForJump = 1;
+
+        private Stream _stream;
+
+        public int StreamOffset => (int)_stream.Length;
+
+        public AllocationResult AllocResult { get; }
+
+        public Assembler Assembler { get; }
+
+        public BasicBlock CurrBlock { get; private set; }
+
+        public int CallArgsRegionSize { get; }
+        public int XmmSaveRegionSize  { get; }
+
+        private long[] _blockOffsets;
+
+        private struct Jump
+        {
+            public bool IsConditional { get; }
+
+            public X86Condition Condition { get; }
+
+            public BasicBlock Target { get; }
+
+            public long JumpPosition { get; }
+
+            public long RelativeOffset { get; set; }
+
+            public int InstSize { get; set; }
+
+            public Jump(BasicBlock target, long jumpPosition)
+            {
+                IsConditional = false;
+                Condition     = 0;
+                Target        = target;
+                JumpPosition  = jumpPosition;
+
+                RelativeOffset = 0;
+
+                InstSize = 0;
+            }
+
+            public Jump(X86Condition condition, BasicBlock target, long jumpPosition)
+            {
+                IsConditional = true;
+                Condition     = condition;
+                Target        = target;
+                JumpPosition  = jumpPosition;
+
+                RelativeOffset = 0;
+
+                InstSize = 0;
+            }
+        }
+
+        private List<Jump> _jumps;
+
+        private X86Condition _jNearCondition;
+
+        private long _jNearPosition;
+        private int  _jNearLength;
+
+        public CodeGenContext(Stream stream, AllocationResult allocResult, int maxCallArgs, int blocksCount)
+        {
+            _stream = stream;
+
+            AllocResult = allocResult;
+
+            Assembler = new Assembler(stream);
+
+            CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize);
+            XmmSaveRegionSize  = xmmSaveRegionSize;
+
+            _blockOffsets = new long[blocksCount];
+
+            _jumps = new List<Jump>();
+        }
+
+        private int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize)
+        {
+            // We need to add 8 bytes to the total size, as the call to this
+            // function already pushed 8 bytes (the return address).
+            int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters;
+            int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters;
+
+            xmmSaveRegionSize = BitUtils.CountBits(vecMask) * 16;
+
+            int calleeSaveRegionSize = BitUtils.CountBits(intMask) * 8 + xmmSaveRegionSize + 8;
+
+            int argsCount = maxCallArgs;
+
+            if (argsCount < 0)
+            {
+                // When the function has no calls, argsCount is -1.
+                // In this case, we don't need to allocate the shadow space.
+                argsCount = 0;
+            }
+            else if (argsCount < 4)
+            {
+                // The ABI mandates that the space for at least 4 arguments
+                // is reserved on the stack (this is called shadow space).
+                argsCount = 4;
+            }
+
+            int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize;
+
+            // TODO: Instead of always multiplying by 16 (the largest possible size of a variable,
+            // since a V128 has 16 bytes), we should calculate the exact size consumed by the
+            // arguments passed to the called functions on the stack.
+            int callArgsAndFrameSize = frameSize + argsCount * 16;
+
+            // Ensure that the Stack Pointer will be aligned to 16 bytes.
+            callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf;
+
+            return callArgsAndFrameSize - frameSize;
+        }
+
+        public void EnterBlock(BasicBlock block)
+        {
+            _blockOffsets[block.Index] = _stream.Position;
+
+            CurrBlock = block;
+        }
+
+        public void JumpTo(BasicBlock target)
+        {
+            _jumps.Add(new Jump(target, _stream.Position));
+
+            WritePadding(ReservedBytesForJump);
+        }
+
+        public void JumpTo(X86Condition condition, BasicBlock target)
+        {
+            _jumps.Add(new Jump(condition, target, _stream.Position));
+
+            WritePadding(ReservedBytesForJump);
+        }
+
+        public void JumpToNear(X86Condition condition)
+        {
+            _jNearCondition = condition;
+            _jNearPosition  = _stream.Position;
+            _jNearLength    = Assembler.GetJccLength(0);
+
+            _stream.Seek(_jNearLength, SeekOrigin.Current);
+        }
+
+        public void JumpHere()
+        {
+            long currentPosition = _stream.Position;
+
+            _stream.Seek(_jNearPosition, SeekOrigin.Begin);
+
+            long offset = currentPosition - (_jNearPosition + _jNearLength);
+
+            Debug.Assert(_jNearLength == Assembler.GetJccLength(offset), "Relative offset doesn't fit on near jump.");
+
+            Assembler.Jcc(_jNearCondition, offset);
+
+            _stream.Seek(currentPosition, SeekOrigin.Begin);
+        }
+
+        private void WritePadding(int size)
+        {
+            while (size-- > 0)
+            {
+                _stream.WriteByte(0);
+            }
+        }
+
+        public byte[] GetCode()
+        {
+            // Write jump relative offsets.
+            bool modified;
+
+            do
+            {
+                modified = false;
+
+                for (int index = 0; index < _jumps.Count; index++)
+                {
+                    Jump jump = _jumps[index];
+
+                    long jumpTarget = _blockOffsets[jump.Target.Index];
+
+                    long offset = jumpTarget - jump.JumpPosition;
+
+                    if (offset < 0)
+                    {
+                        for (int index2 = index - 1; index2 >= 0; index2--)
+                        {
+                            Jump jump2 = _jumps[index2];
+
+                            if (jump2.JumpPosition < jumpTarget)
+                            {
+                                break;
+                            }
+
+                            offset -= jump2.InstSize - ReservedBytesForJump;
+                        }
+                    }
+                    else
+                    {
+                        for (int index2 = index + 1; index2 < _jumps.Count; index2++)
+                        {
+                            Jump jump2 = _jumps[index2];
+
+                            if (jump2.JumpPosition >= jumpTarget)
+                            {
+                                break;
+                            }
+
+                            offset += jump2.InstSize - ReservedBytesForJump;
+                        }
+
+                        offset -= ReservedBytesForJump;
+                    }
+
+                    if (jump.IsConditional)
+                    {
+                        jump.InstSize = Assembler.GetJccLength(offset);
+                    }
+                    else
+                    {
+                        jump.InstSize = Assembler.GetJmpLength(offset);
+                    }
+
+                    // The jump is relative to the next instruction, not the current one.
+                    // Since we didn't know the next instruction address when calculating
+                    // the offset (as the size of the current jump instruction was not know),
+                    // we now need to compensate the offset with the jump instruction size.
+                    // It's also worth to note that:
+                    // - This is only needed for backward jumps.
+                    // - The GetJmpLength and GetJccLength also compensates the offset
+                    // internally when computing the jump instruction size.
+                    if (offset < 0)
+                    {
+                        offset -= jump.InstSize;
+                    }
+
+                    if (jump.RelativeOffset != offset)
+                    {
+                        modified = true;
+                    }
+
+                    jump.RelativeOffset = offset;
+
+                    _jumps[index] = jump;
+                }
+            }
+            while (modified);
+
+            // Write the code, ignoring the dummy bytes after jumps, into a new stream.
+            _stream.Seek(0, SeekOrigin.Begin);
+
+            using (MemoryStream codeStream = new MemoryStream())
+            {
+                Assembler assembler = new Assembler(codeStream);
+
+                byte[] buffer;
+
+                for (int index = 0; index < _jumps.Count; index++)
+                {
+                    Jump jump = _jumps[index];
+
+                    buffer = new byte[jump.JumpPosition - _stream.Position];
+
+                    _stream.Read(buffer, 0, buffer.Length);
+                    _stream.Seek(ReservedBytesForJump, SeekOrigin.Current);
+
+                    codeStream.Write(buffer);
+
+                    if (jump.IsConditional)
+                    {
+                        assembler.Jcc(jump.Condition, jump.RelativeOffset);
+                    }
+                    else
+                    {
+                        assembler.Jmp(jump.RelativeOffset);
+                    }
+                }
+
+                buffer = new byte[_stream.Length - _stream.Position];
+
+                _stream.Read(buffer, 0, buffer.Length);
+
+                codeStream.Write(buffer);
+
+                return codeStream.ToArray();
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
new file mode 100644
index 000000000..ae24b5631
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -0,0 +1,1661 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.X86
+{
+    static class CodeGenerator
+    {
+        private const int PageSize       = 0x1000;
+        private const int StackGuardSize = 0x2000;
+
+        private static Action<CodeGenContext, Operation>[] _instTable;
+
+        static CodeGenerator()
+        {
+            _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))];
+
+            Add(Instruction.Add,                     GenerateAdd);
+            Add(Instruction.BitwiseAnd,              GenerateBitwiseAnd);
+            Add(Instruction.BitwiseExclusiveOr,      GenerateBitwiseExclusiveOr);
+            Add(Instruction.BitwiseNot,              GenerateBitwiseNot);
+            Add(Instruction.BitwiseOr,               GenerateBitwiseOr);
+            Add(Instruction.Branch,                  GenerateBranch);
+            Add(Instruction.BranchIfFalse,           GenerateBranchIfFalse);
+            Add(Instruction.BranchIfTrue,            GenerateBranchIfTrue);
+            Add(Instruction.ByteSwap,                GenerateByteSwap);
+            Add(Instruction.Call,                    GenerateCall);
+            Add(Instruction.Clobber,                 GenerateClobber);
+            Add(Instruction.CompareAndSwap128,       GenerateCompareAndSwap128);
+            Add(Instruction.CompareEqual,            GenerateCompareEqual);
+            Add(Instruction.CompareGreater,          GenerateCompareGreater);
+            Add(Instruction.CompareGreaterOrEqual,   GenerateCompareGreaterOrEqual);
+            Add(Instruction.CompareGreaterOrEqualUI, GenerateCompareGreaterOrEqualUI);
+            Add(Instruction.CompareGreaterUI,        GenerateCompareGreaterUI);
+            Add(Instruction.CompareLess,             GenerateCompareLess);
+            Add(Instruction.CompareLessOrEqual,      GenerateCompareLessOrEqual);
+            Add(Instruction.CompareLessOrEqualUI,    GenerateCompareLessOrEqualUI);
+            Add(Instruction.CompareLessUI,           GenerateCompareLessUI);
+            Add(Instruction.CompareNotEqual,         GenerateCompareNotEqual);
+            Add(Instruction.ConditionalSelect,       GenerateConditionalSelect);
+            Add(Instruction.ConvertI64ToI32,         GenerateConvertI64ToI32);
+            Add(Instruction.ConvertToFP,             GenerateConvertToFP);
+            Add(Instruction.Copy,                    GenerateCopy);
+            Add(Instruction.CountLeadingZeros,       GenerateCountLeadingZeros);
+            Add(Instruction.CpuId,                   GenerateCpuId);
+            Add(Instruction.Divide,                  GenerateDivide);
+            Add(Instruction.DivideUI,                GenerateDivideUI);
+            Add(Instruction.Fill,                    GenerateFill);
+            Add(Instruction.Load,                    GenerateLoad);
+            Add(Instruction.Load16,                  GenerateLoad16);
+            Add(Instruction.Load8,                   GenerateLoad8);
+            Add(Instruction.Multiply,                GenerateMultiply);
+            Add(Instruction.Multiply64HighSI,        GenerateMultiply64HighSI);
+            Add(Instruction.Multiply64HighUI,        GenerateMultiply64HighUI);
+            Add(Instruction.Negate,                  GenerateNegate);
+            Add(Instruction.Return,                  GenerateReturn);
+            Add(Instruction.RotateRight,             GenerateRotateRight);
+            Add(Instruction.ShiftLeft,               GenerateShiftLeft);
+            Add(Instruction.ShiftRightSI,            GenerateShiftRightSI);
+            Add(Instruction.ShiftRightUI,            GenerateShiftRightUI);
+            Add(Instruction.SignExtend16,            GenerateSignExtend16);
+            Add(Instruction.SignExtend32,            GenerateSignExtend32);
+            Add(Instruction.SignExtend8,             GenerateSignExtend8);
+            Add(Instruction.Spill,                   GenerateSpill);
+            Add(Instruction.SpillArg,                GenerateSpillArg);
+            Add(Instruction.StackAlloc,              GenerateStackAlloc);
+            Add(Instruction.Store,                   GenerateStore);
+            Add(Instruction.Store16,                 GenerateStore16);
+            Add(Instruction.Store8,                  GenerateStore8);
+            Add(Instruction.Subtract,                GenerateSubtract);
+            Add(Instruction.VectorCreateScalar,      GenerateVectorCreateScalar);
+            Add(Instruction.VectorExtract,           GenerateVectorExtract);
+            Add(Instruction.VectorExtract16,         GenerateVectorExtract16);
+            Add(Instruction.VectorExtract8,          GenerateVectorExtract8);
+            Add(Instruction.VectorInsert,            GenerateVectorInsert);
+            Add(Instruction.VectorInsert16,          GenerateVectorInsert16);
+            Add(Instruction.VectorInsert8,           GenerateVectorInsert8);
+            Add(Instruction.VectorOne,               GenerateVectorOne);
+            Add(Instruction.VectorZero,              GenerateVectorZero);
+            Add(Instruction.VectorZeroUpper64,       GenerateVectorZeroUpper64);
+            Add(Instruction.VectorZeroUpper96,       GenerateVectorZeroUpper96);
+            Add(Instruction.ZeroExtend16,            GenerateZeroExtend16);
+            Add(Instruction.ZeroExtend32,            GenerateZeroExtend32);
+            Add(Instruction.ZeroExtend8,             GenerateZeroExtend8);
+        }
+
+        private static void Add(Instruction inst, Action<CodeGenContext, Operation> func)
+        {
+            _instTable[(int)inst] = func;
+        }
+
+        public static CompiledFunction Generate(CompilerContext cctx)
+        {
+            ControlFlowGraph cfg = cctx.Cfg;
+
+            Logger.StartPass(PassName.Optimization);
+
+            if ((cctx.Options & CompilerOptions.SsaForm)  != 0 &&
+                (cctx.Options & CompilerOptions.Optimize) != 0)
+            {
+                Optimizer.RunPass(cfg);
+            }
+
+            Logger.EndPass(PassName.Optimization, cfg);
+
+            Logger.StartPass(PassName.PreAllocation);
+
+            StackAllocator stackAlloc = new StackAllocator();
+
+            PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+
+            Logger.EndPass(PassName.PreAllocation, cfg);
+
+            Logger.StartPass(PassName.RegisterAllocation);
+
+            if ((cctx.Options & CompilerOptions.SsaForm) != 0)
+            {
+                Ssa.Deconstruct(cfg);
+            }
+
+            IRegisterAllocator regAlloc;
+
+            if ((cctx.Options & CompilerOptions.Lsra) != 0)
+            {
+                regAlloc = new LinearScanAllocator();
+            }
+            else
+            {
+                regAlloc = new HybridAllocator();
+            }
+
+            RegisterMasks regMasks = new RegisterMasks(
+                CallingConvention.GetIntAvailableRegisters(),
+                CallingConvention.GetVecAvailableRegisters(),
+                CallingConvention.GetIntCallerSavedRegisters(),
+                CallingConvention.GetVecCallerSavedRegisters(),
+                CallingConvention.GetIntCalleeSavedRegisters(),
+                CallingConvention.GetVecCalleeSavedRegisters());
+
+            AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+            Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+            Logger.StartPass(PassName.CodeGeneration);
+
+            using (MemoryStream stream = new MemoryStream())
+            {
+                CodeGenContext context = new CodeGenContext(stream, allocResult, maxCallArgs, cfg.Blocks.Count);
+
+                UnwindInfo unwindInfo = WritePrologue(context);
+
+                foreach (BasicBlock block in cfg.Blocks)
+                {
+                    context.EnterBlock(block);
+
+                    foreach (Node node in block.Operations)
+                    {
+                        if (node is Operation operation)
+                        {
+                            GenerateOperation(context, operation);
+                        }
+                    }
+                }
+
+                Logger.EndPass(PassName.CodeGeneration);
+
+                return new CompiledFunction(context.GetCode(), unwindInfo);
+            }
+        }
+
+        private static void GenerateOperation(CodeGenContext context, Operation operation)
+        {
+            if (operation.Instruction == Instruction.Extended)
+            {
+                IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
+
+                IntrinsicInfo info = IntrinsicTable.GetInfo(intrinOp.Intrinsic);
+
+                switch (info.Type)
+                {
+                    case IntrinsicType.Comis_:
+                    {
+                        Operand dest = operation.Destination;
+                        Operand src1 = operation.GetSource(0);
+                        Operand src2 = operation.GetSource(1);
+
+                        switch (intrinOp.Intrinsic)
+                        {
+                            case Intrinsic.X86Comisdeq:
+                                context.Assembler.Comisd(src1, src2);
+                                context.Assembler.Setcc(dest, X86Condition.Equal);
+                                break;
+
+                            case Intrinsic.X86Comisdge:
+                                context.Assembler.Comisd(src1, src2);
+                                context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+                                break;
+
+                            case Intrinsic.X86Comisdlt:
+                                context.Assembler.Comisd(src1, src2);
+                                context.Assembler.Setcc(dest, X86Condition.Below);
+                                break;
+
+                            case Intrinsic.X86Comisseq:
+                                context.Assembler.Comiss(src1, src2);
+                                context.Assembler.Setcc(dest, X86Condition.Equal);
+                                break;
+
+                            case Intrinsic.X86Comissge:
+                                context.Assembler.Comiss(src1, src2);
+                                context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+                                break;
+
+                            case Intrinsic.X86Comisslt:
+                                context.Assembler.Comiss(src1, src2);
+                                context.Assembler.Setcc(dest, X86Condition.Below);
+                                break;
+                        }
+
+                        context.Assembler.Movzx8(dest, dest, OperandType.I32);
+
+                        break;
+                    }
+
+                    case IntrinsicType.PopCount:
+                    {
+                        Operand dest   = operation.Destination;
+                        Operand source = operation.GetSource(0);
+
+                        EnsureSameType(dest, source);
+
+                        Debug.Assert(dest.Type.IsInteger());
+
+                        context.Assembler.Popcnt(dest, source, dest.Type);
+
+                        break;
+                    }
+
+                    case IntrinsicType.Unary:
+                    {
+                        Operand dest   = operation.Destination;
+                        Operand source = operation.GetSource(0);
+
+                        EnsureSameType(dest, source);
+
+                        Debug.Assert(!dest.Type.IsInteger());
+
+                        context.Assembler.WriteInstruction(info.Inst, dest, source);
+
+                        break;
+                    }
+
+                    case IntrinsicType.UnaryToGpr:
+                    {
+                        Operand dest   = operation.Destination;
+                        Operand source = operation.GetSource(0);
+
+                        Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
+
+                        context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
+
+                        break;
+                    }
+
+                    case IntrinsicType.Binary:
+                    {
+                        Operand dest = operation.Destination;
+                        Operand src1 = operation.GetSource(0);
+                        Operand src2 = operation.GetSource(1);
+
+                        EnsureSameType(dest, src1);
+
+                        if (!HardwareCapabilities.SupportsVexEncoding)
+                        {
+                            EnsureSameReg(dest, src1);
+                        }
+
+                        Debug.Assert(!dest.Type.IsInteger());
+                        Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant);
+
+                        context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+
+                        break;
+                    }
+
+                    case IntrinsicType.BinaryImm:
+                    {
+                        Operand dest = operation.Destination;
+                        Operand src1 = operation.GetSource(0);
+                        Operand src2 = operation.GetSource(1);
+
+                        EnsureSameType(dest, src1);
+
+                        if (!HardwareCapabilities.SupportsVexEncoding)
+                        {
+                            EnsureSameReg(dest, src1);
+                        }
+
+                        Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant);
+
+                        context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte());
+
+                        break;
+                    }
+
+                    case IntrinsicType.Ternary:
+                    {
+                        Operand dest = operation.Destination;
+                        Operand src1 = operation.GetSource(0);
+                        Operand src2 = operation.GetSource(1);
+                        Operand src3 = operation.GetSource(2);
+
+                        EnsureSameType(dest, src1, src2, src3);
+
+                        Debug.Assert(!dest.Type.IsInteger());
+
+                        if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding)
+                        {
+                            context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3);
+                        }
+                        else
+                        {
+                            EnsureSameReg(dest, src1);
+
+                            Debug.Assert(src3.GetRegister().Index == 0);
+
+                            context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+                        }
+
+                        break;
+                    }
+
+                    case IntrinsicType.TernaryImm:
+                    {
+                        Operand dest = operation.Destination;
+                        Operand src1 = operation.GetSource(0);
+                        Operand src2 = operation.GetSource(1);
+                        Operand src3 = operation.GetSource(2);
+
+                        EnsureSameType(dest, src1, src2);
+
+                        if (!HardwareCapabilities.SupportsVexEncoding)
+                        {
+                            EnsureSameReg(dest, src1);
+                        }
+
+                        Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant);
+
+                        context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte());
+
+                        break;
+                    }
+                }
+            }
+            else
+            {
+                Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction];
+
+                if (func != null)
+                {
+                    func(context, operation);
+                }
+                else
+                {
+                    throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+                }
+            }
+        }
+
+        private static void GenerateAdd(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateBinOp(dest, src1, src2);
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Add(dest, src2, dest.Type);
+            }
+            else if (dest.Type == OperandType.FP32)
+            {
+                context.Assembler.Addss(dest, src1, src2);
+            }
+            else /* if (dest.Type == OperandType.FP64) */
+            {
+                context.Assembler.Addsd(dest, src1, src2);
+            }
+        }
+
+        private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateBinOp(dest, src1, src2);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.And(dest, src2, dest.Type);
+        }
+
+        private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateBinOp(dest, src1, src2);
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Xor(dest, src2, dest.Type);
+            }
+            else
+            {
+                context.Assembler.Xorps(dest, src1, src2);
+            }
+        }
+
+        private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            ValidateUnOp(dest, source);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.Not(dest);
+        }
+
+        private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateBinOp(dest, src1, src2);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.Or(dest, src2, dest.Type);
+        }
+
+        private static void GenerateBranch(CodeGenContext context, Operation operation)
+        {
+            context.JumpTo(context.CurrBlock.Branch);
+        }
+
+        private static void GenerateBranchIfFalse(CodeGenContext context, Operation operation)
+        {
+            Operand source = operation.GetSource(0);
+
+            context.Assembler.Test(source, source, source.Type);
+
+            context.JumpTo(X86Condition.Equal, context.CurrBlock.Branch);
+        }
+
+        private static void GenerateBranchIfTrue(CodeGenContext context, Operation operation)
+        {
+            Operand source = operation.GetSource(0);
+
+            context.Assembler.Test(source, source, source.Type);
+
+            context.JumpTo(X86Condition.NotEqual, context.CurrBlock.Branch);
+        }
+
+        private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            ValidateUnOp(dest, source);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.Bswap(dest);
+        }
+
+        private static void GenerateCall(CodeGenContext context, Operation operation)
+        {
+            context.Assembler.Call(operation.GetSource(0));
+        }
+
+        private static void GenerateClobber(CodeGenContext context, Operation operation)
+        {
+            // This is only used to indicate that a register is clobbered to the
+            // register allocator, we don't need to produce any code.
+        }
+
+        private static void GenerateCompareAndSwap128(CodeGenContext context, Operation operation)
+        {
+            Operand source = operation.GetSource(0);
+
+            MemoryOperand memOp = new MemoryOperand(OperandType.I64, source);
+
+            context.Assembler.Cmpxchg16b(memOp);
+        }
+
+        private static void GenerateCompareEqual(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.Equal);
+        }
+
+        private static void GenerateCompareGreater(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.Greater);
+        }
+
+        private static void GenerateCompareGreaterOrEqual(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.GreaterOrEqual);
+        }
+
+        private static void GenerateCompareGreaterOrEqualUI(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.AboveOrEqual);
+        }
+
+        private static void GenerateCompareGreaterUI(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.Above);
+        }
+
+        private static void GenerateCompareLess(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.Less);
+        }
+
+        private static void GenerateCompareLessOrEqual(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.LessOrEqual);
+        }
+
+        private static void GenerateCompareLessOrEqualUI(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.BelowOrEqual);
+        }
+
+        private static void GenerateCompareLessUI(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.Below);
+        }
+
+        private static void GenerateCompareNotEqual(CodeGenContext context, Operation operation)
+        {
+            GenerateCompare(context, operation, X86Condition.NotEqual);
+        }
+
+        private static void GenerateCompare(CodeGenContext context, Operation operation, X86Condition condition)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            EnsureSameType(src1, src2);
+
+            Debug.Assert(dest.Type == OperandType.I32);
+
+            context.Assembler.Cmp(src1, src2, src1.Type);
+            context.Assembler.Setcc(dest, condition);
+            context.Assembler.Movzx8(dest, dest, OperandType.I32);
+        }
+
+        private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+            Operand src3 = operation.GetSource(2);
+
+            EnsureSameReg (dest, src3);
+            EnsureSameType(dest, src2, src3);
+
+            Debug.Assert(dest.Type.IsInteger());
+            Debug.Assert(src1.Type == OperandType.I32);
+
+            context.Assembler.Test  (src1, src1, src1.Type);
+            context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual);
+        }
+
+        private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+            context.Assembler.Mov(dest, source, OperandType.I32);
+        }
+
+        private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+
+            if (dest.Type == OperandType.FP32)
+            {
+                Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64);
+
+                if (source.Type.IsInteger())
+                {
+                    context.Assembler.Xorps   (dest, dest, dest);
+                    context.Assembler.Cvtsi2ss(dest, dest, source, source.Type);
+                }
+                else /* if (source.Type == OperandType.FP64) */
+                {
+                    context.Assembler.Cvtsd2ss(dest, dest, source);
+
+                    GenerateZeroUpper96(context, dest, dest);
+                }
+            }
+            else /* if (dest.Type == OperandType.FP64) */
+            {
+                Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32);
+
+                if (source.Type.IsInteger())
+                {
+                    context.Assembler.Xorps   (dest, dest, dest);
+                    context.Assembler.Cvtsi2sd(dest, dest, source, source.Type);
+                }
+                else /* if (source.Type == OperandType.FP32) */
+                {
+                    context.Assembler.Cvtss2sd(dest, dest, source);
+
+                    GenerateZeroUpper64(context, dest, dest);
+                }
+            }
+        }
+
+        private static void GenerateCopy(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            EnsureSameType(dest, source);
+
+            Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+            // Moves to the same register are useless.
+            if (dest.Kind == source.Kind && dest.Value == source.Value)
+            {
+                return;
+            }
+
+            if (dest.Kind   == OperandKind.Register &&
+                source.Kind == OperandKind.Constant && source.Value == 0)
+            {
+                // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient.
+                context.Assembler.Xor(dest, dest, OperandType.I32);
+            }
+            else if (dest.Type.IsInteger())
+            {
+                context.Assembler.Mov(dest, source, dest.Type);
+            }
+            else
+            {
+                context.Assembler.Movdqu(dest, source);
+            }
+        }
+
+        private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            EnsureSameType(dest, source);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.Bsr(dest, source, dest.Type);
+
+            int operandSize = dest.Type == OperandType.I32 ? 32 : 64;
+            int operandMask = operandSize - 1;
+
+            // When the input operand is 0, the result is undefined, however the
+            // ZF flag is set. We are supposed to return the operand size on that
+            // case. So, add an additional jump to handle that case, by moving the
+            // operand size constant to the destination register.
+            context.JumpToNear(X86Condition.NotEqual);
+
+            context.Assembler.Mov(dest, new Operand(operandSize | operandMask), OperandType.I32);
+
+            context.JumpHere();
+
+            // BSR returns the zero based index of the last bit set on the operand,
+            // starting from the least significant bit. However we are supposed to
+            // return the number of 0 bits on the high end. So, we invert the result
+            // of the BSR using XOR to get the correct value.
+            context.Assembler.Xor(dest, new Operand(operandMask), OperandType.I32);
+        }
+
+        private static void GenerateCpuId(CodeGenContext context, Operation operation)
+        {
+            context.Assembler.Cpuid();
+        }
+
+        private static void GenerateDivide(CodeGenContext context, Operation operation)
+        {
+            Operand dest     = operation.Destination;
+            Operand dividend = operation.GetSource(0);
+            Operand divisor  = operation.GetSource(1);
+
+            if (!dest.Type.IsInteger())
+            {
+                ValidateBinOp(dest, dividend, divisor);
+            }
+
+            if (dest.Type.IsInteger())
+            {
+                divisor = operation.GetSource(2);
+
+                EnsureSameType(dest, divisor);
+
+                if (divisor.Type == OperandType.I32)
+                {
+                    context.Assembler.Cdq();
+                }
+                else
+                {
+                    context.Assembler.Cqo();
+                }
+
+                context.Assembler.Idiv(divisor);
+            }
+            else if (dest.Type == OperandType.FP32)
+            {
+                context.Assembler.Divss(dest, dividend, divisor);
+            }
+            else /* if (dest.Type == OperandType.FP64) */
+            {
+                context.Assembler.Divsd(dest, dividend, divisor);
+            }
+        }
+
+        private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+        {
+            Operand divisor = operation.GetSource(2);
+
+            Operand rdx = Register(X86Register.Rdx);
+
+            Debug.Assert(divisor.Type.IsInteger());
+
+            context.Assembler.Xor(rdx, rdx, OperandType.I32);
+            context.Assembler.Div(divisor);
+        }
+
+        private static void GenerateFill(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand offset = operation.GetSource(0);
+
+            Debug.Assert(offset.Kind == OperandKind.Constant);
+
+            int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+            Operand rsp = Register(X86Register.Rsp);
+
+            MemoryOperand memOp = new MemoryOperand(dest.Type, rsp, null, Multiplier.x1, offs);
+
+            GenerateLoad(context, memOp, dest);
+        }
+
+        private static void GenerateLoad(CodeGenContext context, Operation operation)
+        {
+            Operand value   =        operation.Destination;
+            Operand address = Memory(operation.GetSource(0), value.Type);
+
+            GenerateLoad(context, address, value);
+        }
+
+        private static void GenerateLoad16(CodeGenContext context, Operation operation)
+        {
+            Operand value   =        operation.Destination;
+            Operand address = Memory(operation.GetSource(0), value.Type);
+
+            Debug.Assert(value.Type.IsInteger());
+
+            context.Assembler.Movzx16(value, address, value.Type);
+        }
+
+        private static void GenerateLoad8(CodeGenContext context, Operation operation)
+        {
+            Operand value   =        operation.Destination;
+            Operand address = Memory(operation.GetSource(0), value.Type);
+
+            Debug.Assert(value.Type.IsInteger());
+
+            context.Assembler.Movzx8(value, address, value.Type);
+        }
+
+        private static void GenerateMultiply(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            if (src2.Kind != OperandKind.Constant)
+            {
+                EnsureSameReg(dest, src1);
+            }
+
+            EnsureSameType(dest, src1, src2);
+
+            if (dest.Type.IsInteger())
+            {
+                if (src2.Kind == OperandKind.Constant)
+                {
+                    context.Assembler.Imul(dest, src1, src2, dest.Type);
+                }
+                else
+                {
+                    context.Assembler.Imul(dest, src2, dest.Type);
+                }
+            }
+            else if (dest.Type == OperandType.FP32)
+            {
+                context.Assembler.Mulss(dest, src1, src2);
+            }
+            else /* if (dest.Type == OperandType.FP64) */
+            {
+                context.Assembler.Mulsd(dest, src1, src2);
+            }
+        }
+
+        private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+        {
+            Operand source = operation.GetSource(1);
+
+            Debug.Assert(source.Type == OperandType.I64);
+
+            context.Assembler.Imul(source);
+        }
+
+        private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+        {
+            Operand source = operation.GetSource(1);
+
+            Debug.Assert(source.Type == OperandType.I64);
+
+            context.Assembler.Mul(source);
+        }
+
+        private static void GenerateNegate(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            ValidateUnOp(dest, source);
+
+            Debug.Assert(dest.Type.IsInteger());
+
+            context.Assembler.Neg(dest);
+        }
+
+        private static void GenerateReturn(CodeGenContext context, Operation operation)
+        {
+            WriteEpilogue(context);
+
+            context.Assembler.Return();
+        }
+
+        private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateShift(dest, src1, src2);
+
+            context.Assembler.Ror(dest, src2, dest.Type);
+        }
+
+        private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateShift(dest, src1, src2);
+
+            context.Assembler.Shl(dest, src2, dest.Type);
+        }
+
+        private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateShift(dest, src1, src2);
+
+            context.Assembler.Sar(dest, src2, dest.Type);
+        }
+
+        private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateShift(dest, src1, src2);
+
+            context.Assembler.Shr(dest, src2, dest.Type);
+        }
+
+        private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Movsx16(dest, source, dest.Type);
+        }
+
+        private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Movsx32(dest, source, dest.Type);
+        }
+
+        private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Movsx8(dest, source, dest.Type);
+        }
+
+        private static void GenerateSpill(CodeGenContext context, Operation operation)
+        {
+            GenerateSpill(context, operation, context.CallArgsRegionSize);
+        }
+
+        private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+        {
+            GenerateSpill(context, operation, 0);
+        }
+
+        private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+        {
+            Operand offset = operation.GetSource(0);
+            Operand source = operation.GetSource(1);
+
+            Debug.Assert(offset.Kind == OperandKind.Constant);
+
+            int offs = offset.AsInt32() + baseOffset;
+
+            Operand rsp = Register(X86Register.Rsp);
+
+            MemoryOperand memOp = new MemoryOperand(source.Type, rsp, null, Multiplier.x1, offs);
+
+            GenerateStore(context, memOp, source);
+        }
+
+        private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand offset = operation.GetSource(0);
+
+            Debug.Assert(offset.Kind == OperandKind.Constant);
+
+            int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+            Operand rsp = Register(X86Register.Rsp);
+
+            MemoryOperand memOp = new MemoryOperand(OperandType.I64, rsp, null, Multiplier.x1, offs);
+
+            context.Assembler.Lea(dest, memOp, OperandType.I64);
+        }
+
+        private static void GenerateStore(CodeGenContext context, Operation operation)
+        {
+            Operand value   =        operation.GetSource(1);
+            Operand address = Memory(operation.GetSource(0), value.Type);
+
+            GenerateStore(context, address, value);
+        }
+
+        private static void GenerateStore16(CodeGenContext context, Operation operation)
+        {
+            Operand value   =        operation.GetSource(1);
+            Operand address = Memory(operation.GetSource(0), value.Type);
+
+            Debug.Assert(value.Type.IsInteger());
+
+            context.Assembler.Mov16(address, value);
+        }
+
+        private static void GenerateStore8(CodeGenContext context, Operation operation)
+        {
+            Operand value   =        operation.GetSource(1);
+            Operand address = Memory(operation.GetSource(0), value.Type);
+
+            Debug.Assert(value.Type.IsInteger());
+
+            context.Assembler.Mov8(address, value);
+        }
+
+        private static void GenerateSubtract(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+            Operand src2 = operation.GetSource(1);
+
+            ValidateBinOp(dest, src1, src2);
+
+            if (dest.Type.IsInteger())
+            {
+                context.Assembler.Sub(dest, src2, dest.Type);
+            }
+            else if (dest.Type == OperandType.FP32)
+            {
+                context.Assembler.Subss(dest, src1, src2);
+            }
+            else /* if (dest.Type == OperandType.FP64) */
+            {
+                context.Assembler.Subsd(dest, src1, src2);
+            }
+        }
+
+        private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+            if (source.Type == OperandType.I32)
+            {
+                context.Assembler.Movd(dest, source);
+            }
+            else /* if (source.Type == OperandType.I64) */
+            {
+                context.Assembler.Movq(dest, source);
+            }
+        }
+
+        private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;  //Value
+            Operand src1 = operation.GetSource(0); //Vector
+            Operand src2 = operation.GetSource(1); //Index
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src2.Kind == OperandKind.Constant);
+
+            byte index = src2.AsByte();
+
+            if (dest.Type == OperandType.I32)
+            {
+                Debug.Assert(index < 4);
+
+                if (HardwareCapabilities.SupportsSse41)
+                {
+                    context.Assembler.Pextrd(dest, src1, index);
+                }
+                else
+                {
+                    if (index != 0)
+                    {
+                        int mask0 = 0b11_10_01_00;
+                        int mask1 = 0b11_10_01_00;
+
+                        mask0 = BitUtils.RotateRight(mask0,     index * 2, 8);
+                        mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+                        context.Assembler.Pshufd(src1, src1, (byte)mask0);
+                        context.Assembler.Movd  (dest, src1);
+                        context.Assembler.Pshufd(src1, src1, (byte)mask1);
+                    }
+                    else
+                    {
+                        context.Assembler.Movd(dest, src1);
+                    }
+                }
+            }
+            else if (dest.Type == OperandType.I64)
+            {
+                Debug.Assert(index < 2);
+
+                if (HardwareCapabilities.SupportsSse41)
+                {
+                    context.Assembler.Pextrq(dest, src1, index);
+                }
+                else
+                {
+                    if (index != 0)
+                    {
+                        const byte mask = 0b01_00_11_10;
+
+                        context.Assembler.Pshufd(src1, src1, mask);
+                        context.Assembler.Movq  (dest, src1);
+                        context.Assembler.Pshufd(src1, src1, mask);
+                    }
+                    else
+                    {
+                        context.Assembler.Movq(dest, src1);
+                    }
+                }
+            }
+            else
+            {
+                Debug.Assert(index < (dest.Type == OperandType.FP32 ? 4 : 2));
+
+                // Floating-point types.
+                if ((index >= 2 && dest.Type == OperandType.FP32) ||
+                    (index == 1 && dest.Type == OperandType.FP64))
+                {
+                    context.Assembler.Movhlps(dest, dest, src1);
+                    context.Assembler.Movq   (dest, dest);
+                }
+                else
+                {
+                    context.Assembler.Movq(dest, src1);
+                }
+
+                if (dest.Type == OperandType.FP32)
+                {
+                    context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1)));
+                }
+            }
+        }
+
+        private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;  //Value
+            Operand src1 = operation.GetSource(0); //Vector
+            Operand src2 = operation.GetSource(1); //Index
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src2.Kind == OperandKind.Constant);
+
+            byte index = src2.AsByte();
+
+            Debug.Assert(index < 8);
+
+            context.Assembler.Pextrw(dest, src1, index);
+        }
+
+        private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;  //Value
+            Operand src1 = operation.GetSource(0); //Vector
+            Operand src2 = operation.GetSource(1); //Index
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src2.Kind == OperandKind.Constant);
+
+            byte index = src2.AsByte();
+
+            Debug.Assert(index < 16);
+
+            if (HardwareCapabilities.SupportsSse41)
+            {
+                context.Assembler.Pextrb(dest, src1, index);
+            }
+            else
+            {
+                context.Assembler.Pextrw(dest, src1, (byte)(index >> 1));
+
+                if ((index & 1) != 0)
+                {
+                    context.Assembler.Shr(dest, new Operand(8), OperandType.I32);
+                }
+                else
+                {
+                    context.Assembler.Movzx8(dest, dest, OperandType.I32);
+                }
+            }
+        }
+
+        private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0); //Vector
+            Operand src2 = operation.GetSource(1); //Value
+            Operand src3 = operation.GetSource(2); //Index
+
+            if (!HardwareCapabilities.SupportsVexEncoding)
+            {
+                EnsureSameReg(dest, src1);
+            }
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src3.Kind == OperandKind.Constant);
+
+            byte index = src3.AsByte();
+
+            void InsertIntSse2(int words)
+            {
+                if (dest.GetRegister() != src1.GetRegister())
+                {
+                    context.Assembler.Movdqu(dest, src1);
+                }
+
+                for (int word = 0; word < words; word++)
+                {
+                    // Insert lower 16-bits.
+                    context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word));
+
+                    // Move next word down.
+                    context.Assembler.Ror(src2, new Operand(16), src2.Type);
+                }
+            }
+
+            if (src2.Type == OperandType.I32)
+            {
+                Debug.Assert(index < 4);
+
+                if (HardwareCapabilities.SupportsSse41)
+                {
+                    context.Assembler.Pinsrd(dest, src1, src2, index);
+                }
+                else
+                {
+                    InsertIntSse2(2);
+                }
+            }
+            else if (src2.Type == OperandType.I64)
+            {
+                Debug.Assert(index < 2);
+
+                if (HardwareCapabilities.SupportsSse41)
+                {
+                    context.Assembler.Pinsrq(dest, src1, src2, index);
+                }
+                else
+                {
+                    InsertIntSse2(4);
+                }
+            }
+            else if (src2.Type == OperandType.FP32)
+            {
+                Debug.Assert(index < 4);
+
+                if (index != 0)
+                {
+                    if (HardwareCapabilities.SupportsSse41)
+                    {
+                        context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4));
+                    }
+                    else
+                    {
+                        if (src1.GetRegister() == src2.GetRegister())
+                        {
+                            int mask = 0b11_10_01_00;
+
+                            mask &= ~(0b11 << index * 2);
+
+                            context.Assembler.Pshufd(dest, src1, (byte)mask);
+                        }
+                        else
+                        {
+                            int mask0 = 0b11_10_01_00;
+                            int mask1 = 0b11_10_01_00;
+
+                            mask0 = BitUtils.RotateRight(mask0,     index * 2, 8);
+                            mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+                            context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0.
+                            context.Assembler.Movss (dest, src1, src2);        // dest[127:0] = src1[127:32] | src2[31:0]
+                            context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position.
+
+                            if (dest.GetRegister() != src1.GetRegister())
+                            {
+                                context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1.
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    context.Assembler.Movss(dest, src1, src2);
+                }
+            }
+            else /* if (src2.Type == OperandType.FP64) */
+            {
+                Debug.Assert(index < 2);
+
+                if (index != 0)
+                {
+                    context.Assembler.Movlhps(dest, src1, src2);
+                }
+                else
+                {
+                    context.Assembler.Movsd(dest, src1, src2);
+                }
+            }
+        }
+
+        private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0); //Vector
+            Operand src2 = operation.GetSource(1); //Value
+            Operand src3 = operation.GetSource(2); //Index
+
+            if (!HardwareCapabilities.SupportsVexEncoding)
+            {
+                EnsureSameReg(dest, src1);
+            }
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src3.Kind == OperandKind.Constant);
+
+            byte index = src3.AsByte();
+
+            context.Assembler.Pinsrw(dest, src1, src2, index);
+        }
+
+        private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0); //Vector
+            Operand src2 = operation.GetSource(1); //Value
+            Operand src3 = operation.GetSource(2); //Index
+
+            // It's not possible to emulate this instruction without
+            // SSE 4.1 support without the use of a temporary register,
+            // so we instead handle that case on the pre-allocator when
+            // SSE 4.1 is not supported on the CPU.
+            Debug.Assert(HardwareCapabilities.SupportsSse41);
+
+            if (!HardwareCapabilities.SupportsVexEncoding)
+            {
+                EnsureSameReg(dest, src1);
+            }
+
+            Debug.Assert(src1.Type == OperandType.V128);
+            Debug.Assert(src3.Kind == OperandKind.Constant);
+
+            byte index = src3.AsByte();
+
+            context.Assembler.Pinsrb(dest, src1, src2, index);
+        }
+
+        private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+
+            Debug.Assert(!dest.Type.IsInteger());
+
+            context.Assembler.Pcmpeqw(dest, dest, dest);
+        }
+
+        private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+        {
+            Operand dest = operation.Destination;
+
+            Debug.Assert(!dest.Type.IsInteger());
+
+            context.Assembler.Xorps(dest, dest, dest);
+        }
+
+        private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+            GenerateZeroUpper64(context, dest, source);
+        }
+
+        private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+            GenerateZeroUpper96(context, dest, source);
+        }
+
+        private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Movzx16(dest, source, OperandType.I32);
+        }
+
+        private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Mov(dest, source, OperandType.I32);
+        }
+
+        private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+        {
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+            context.Assembler.Movzx8(dest, source, OperandType.I32);
+        }
+
+        private static void GenerateLoad(CodeGenContext context, Operand address, Operand value)
+        {
+            switch (value.Type)
+            {
+                case OperandType.I32:  context.Assembler.Mov   (value, address, OperandType.I32); break;
+                case OperandType.I64:  context.Assembler.Mov   (value, address, OperandType.I64); break;
+                case OperandType.FP32: context.Assembler.Movd  (value, address);                  break;
+                case OperandType.FP64: context.Assembler.Movq  (value, address);                  break;
+                case OperandType.V128: context.Assembler.Movdqu(value, address);                  break;
+
+                default: Debug.Assert(false); break;
+            }
+        }
+
+        private static void GenerateStore(CodeGenContext context, Operand address, Operand value)
+        {
+            switch (value.Type)
+            {
+                case OperandType.I32:  context.Assembler.Mov   (address, value, OperandType.I32); break;
+                case OperandType.I64:  context.Assembler.Mov   (address, value, OperandType.I64); break;
+                case OperandType.FP32: context.Assembler.Movd  (address, value);                  break;
+                case OperandType.FP64: context.Assembler.Movq  (address, value);                  break;
+                case OperandType.V128: context.Assembler.Movdqu(address, value);                  break;
+
+                default: Debug.Assert(false); break;
+            }
+        }
+
+        private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source)
+        {
+            context.Assembler.Movq(dest, source);
+        }
+
+        private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source)
+        {
+            context.Assembler.Movq(dest, source);
+            context.Assembler.Pshufd(dest, dest, 0xfc);
+        }
+
+        private static void ValidateUnOp(Operand dest, Operand source)
+        {
+#if DEBUG
+            EnsureSameReg (dest, source);
+            EnsureSameType(dest, source);
+#endif
+        }
+
+        private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+        {
+#if DEBUG
+            EnsureSameReg (dest, src1);
+            EnsureSameType(dest, src1, src2);
+#endif
+        }
+
+        private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+        {
+#if DEBUG
+            EnsureSameReg (dest, src1);
+            EnsureSameType(dest, src1);
+
+            Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+#endif
+        }
+
+        private static void EnsureSameReg(Operand op1, Operand op2)
+        {
+            if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding)
+            {
+                return;
+            }
+
+            Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+            Debug.Assert(op1.Kind == op2.Kind);
+            Debug.Assert(op1.Value == op2.Value);
+        }
+
+        private static void EnsureSameType(Operand op1, Operand op2)
+        {
+            Debug.Assert(op1.Type == op2.Type);
+        }
+
+        private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+        {
+            Debug.Assert(op1.Type == op2.Type);
+            Debug.Assert(op1.Type == op3.Type);
+        }
+
+        private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+        {
+            Debug.Assert(op1.Type == op2.Type);
+            Debug.Assert(op1.Type == op3.Type);
+            Debug.Assert(op1.Type == op4.Type);
+        }
+
+        private static UnwindInfo WritePrologue(CodeGenContext context)
+        {
+            List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>();
+
+            Operand rsp = Register(X86Register.Rsp);
+
+            int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+            while (mask != 0)
+            {
+                int bit = BitUtils.LowestBitSet(mask);
+
+                context.Assembler.Push(Register((X86Register)bit));
+
+                pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Integer, context.StreamOffset));
+
+                mask &= ~(1 << bit);
+            }
+
+            int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+            reservedStackSize += context.XmmSaveRegionSize;
+
+            if (reservedStackSize >= StackGuardSize)
+            {
+                GenerateInlineStackProbe(context, reservedStackSize);
+            }
+
+            if (reservedStackSize != 0)
+            {
+                context.Assembler.Sub(rsp, new Operand(reservedStackSize), OperandType.I64);
+            }
+
+            int offset = reservedStackSize;
+
+            mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+            while (mask != 0)
+            {
+                int bit = BitUtils.LowestBitSet(mask);
+
+                offset -= 16;
+
+                MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset);
+
+                context.Assembler.Movdqu(memOp, Xmm((X86Register)bit));
+
+                pushEntries.Add(new UnwindPushEntry(bit, RegisterType.Vector, context.StreamOffset));
+
+                mask &= ~(1 << bit);
+            }
+
+            return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset, reservedStackSize);
+        }
+
+        private static void WriteEpilogue(CodeGenContext context)
+        {
+            Operand rsp = Register(X86Register.Rsp);
+
+            int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+            reservedStackSize += context.XmmSaveRegionSize;
+
+            int offset = reservedStackSize;
+
+            int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+            while (mask != 0)
+            {
+                int bit = BitUtils.LowestBitSet(mask);
+
+                offset -= 16;
+
+                MemoryOperand memOp = new MemoryOperand(OperandType.V128, rsp, null, Multiplier.x1, offset);
+
+                context.Assembler.Movdqu(Xmm((X86Register)bit), memOp);
+
+                mask &= ~(1 << bit);
+            }
+
+            if (reservedStackSize != 0)
+            {
+                context.Assembler.Add(rsp, new Operand(reservedStackSize), OperandType.I64);
+            }
+
+            mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+            while (mask != 0)
+            {
+                int bit = BitUtils.HighestBitSet(mask);
+
+                context.Assembler.Pop(Register((X86Register)bit));
+
+                mask &= ~(1 << bit);
+            }
+        }
+
+        private static void GenerateInlineStackProbe(CodeGenContext context, int size)
+        {
+            // Windows does lazy stack allocation, and there are just 2
+            // guard pages on the end of the stack. So, if the allocation
+            // size we make is greater than this guard size, we must ensure
+            // that the OS will map all pages that we'll use. We do that by
+            // doing a dummy read on those pages, forcing a page fault and
+            // the OS to map them. If they are already mapped, nothing happens.
+            const int pageMask = PageSize - 1;
+
+            size = (size + pageMask) & ~pageMask;
+
+            Operand rsp  = Register(X86Register.Rsp);
+            Operand temp = Register(CallingConvention.GetIntReturnRegister());
+
+            for (int offset = PageSize; offset < size; offset += PageSize)
+            {
+                Operand memOp = new MemoryOperand(OperandType.I32, rsp, null, Multiplier.x1, -offset);;
+
+                context.Assembler.Mov(temp, memOp, OperandType.I32);
+            }
+        }
+
+        private static MemoryOperand Memory(Operand operand, OperandType type)
+        {
+            if (operand.Kind == OperandKind.Memory)
+            {
+                return operand as MemoryOperand;
+            }
+
+            return new MemoryOperand(type, operand);
+        }
+
+        private static Operand Register(X86Register register, OperandType type = OperandType.I64)
+        {
+            return new Operand((int)register, RegisterType.Integer, type);
+        }
+
+        private static Operand Xmm(X86Register register)
+        {
+            return new Operand((int)register, RegisterType.Vector, OperandType.V128);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
new file mode 100644
index 000000000..7f930d6b9
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -0,0 +1,52 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+    static class HardwareCapabilities
+    {
+        private delegate ulong GetFeatureInfo();
+
+        private static ulong _featureInfo;
+
+        public static bool SupportsSse3      => (_featureInfo & (1UL << 0))  != 0;
+        public static bool SupportsPclmulqdq => (_featureInfo & (1UL << 1))  != 0;
+        public static bool SupportsSsse3     => (_featureInfo & (1UL << 9))  != 0;
+        public static bool SupportsFma       => (_featureInfo & (1UL << 12)) != 0;
+        public static bool SupportsCx16      => (_featureInfo & (1UL << 13)) != 0;
+        public static bool SupportsSse41     => (_featureInfo & (1UL << 19)) != 0;
+        public static bool SupportsSse42     => (_featureInfo & (1UL << 20)) != 0;
+        public static bool SupportsPopcnt    => (_featureInfo & (1UL << 23)) != 0;
+        public static bool SupportsAesni     => (_featureInfo & (1UL << 25)) != 0;
+        public static bool SupportsAvx       => (_featureInfo & (1UL << 28)) != 0;
+        public static bool SupportsF16c      => (_featureInfo & (1UL << 29)) != 0;
+
+        public static bool SupportsSse  => (_featureInfo & (1UL << 32 + 25)) != 0;
+        public static bool SupportsSse2 => (_featureInfo & (1UL << 32 + 26)) != 0;
+
+        public static bool ForceLegacySse { get; set; }
+
+        public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx;
+
+        static HardwareCapabilities()
+        {
+            EmitterContext context = new EmitterContext();
+
+            Operand featureInfo = context.CpuId();
+
+            context.Return(featureInfo);
+
+            ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+            OperandType[] argTypes = new OperandType[0];
+
+            GetFeatureInfo getFeatureInfo = Compiler.Compile<GetFeatureInfo>(
+                cfg,
+                argTypes,
+                OperandType.I64,
+                CompilerOptions.HighCq);
+
+            _featureInfo = getFeatureInfo();
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
new file mode 100644
index 000000000..b1af352bc
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.X86
+{
+    struct IntrinsicInfo
+    {
+        public X86Instruction Inst { get; }
+        public IntrinsicType  Type { get; }
+
+        public IntrinsicInfo(X86Instruction inst, IntrinsicType type)
+        {
+            Inst = inst;
+            Type = type;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
new file mode 100644
index 000000000..e225f2542
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -0,0 +1,160 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+    static class IntrinsicTable
+    {
+        private const int BadOp = 0;
+
+        private static IntrinsicInfo[] _intrinTable;
+
+        static IntrinsicTable()
+        {
+            _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+            Add(Intrinsic.X86Addpd,      new IntrinsicInfo(X86Instruction.Addpd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Addps,      new IntrinsicInfo(X86Instruction.Addps,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Addsd,      new IntrinsicInfo(X86Instruction.Addsd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Addss,      new IntrinsicInfo(X86Instruction.Addss,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Andnpd,     new IntrinsicInfo(X86Instruction.Andnpd,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Andnps,     new IntrinsicInfo(X86Instruction.Andnps,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Cmppd,      new IntrinsicInfo(X86Instruction.Cmppd,      IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Cmpps,      new IntrinsicInfo(X86Instruction.Cmpps,      IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Cmpsd,      new IntrinsicInfo(X86Instruction.Cmpsd,      IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Cmpss,      new IntrinsicInfo(X86Instruction.Cmpss,      IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Comisdeq,   new IntrinsicInfo(X86Instruction.Comisd,     IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comisdge,   new IntrinsicInfo(X86Instruction.Comisd,     IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comisdlt,   new IntrinsicInfo(X86Instruction.Comisd,     IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comisseq,   new IntrinsicInfo(X86Instruction.Comiss,     IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comissge,   new IntrinsicInfo(X86Instruction.Comiss,     IntrinsicType.Comis_));
+            Add(Intrinsic.X86Comisslt,   new IntrinsicInfo(X86Instruction.Comiss,     IntrinsicType.Comis_));
+            Add(Intrinsic.X86Cvtdq2pd,   new IntrinsicInfo(X86Instruction.Cvtdq2pd,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtdq2ps,   new IntrinsicInfo(X86Instruction.Cvtdq2ps,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtpd2dq,   new IntrinsicInfo(X86Instruction.Cvtpd2dq,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtpd2ps,   new IntrinsicInfo(X86Instruction.Cvtpd2ps,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtps2dq,   new IntrinsicInfo(X86Instruction.Cvtps2dq,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtps2pd,   new IntrinsicInfo(X86Instruction.Cvtps2pd,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Cvtsd2si,   new IntrinsicInfo(X86Instruction.Cvtsd2si,   IntrinsicType.UnaryToGpr));
+            Add(Intrinsic.X86Cvtsd2ss,   new IntrinsicInfo(X86Instruction.Cvtsd2ss,   IntrinsicType.Binary));
+            Add(Intrinsic.X86Cvtss2sd,   new IntrinsicInfo(X86Instruction.Cvtss2sd,   IntrinsicType.Binary));
+            Add(Intrinsic.X86Divpd,      new IntrinsicInfo(X86Instruction.Divpd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Divps,      new IntrinsicInfo(X86Instruction.Divps,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Divsd,      new IntrinsicInfo(X86Instruction.Divsd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Divss,      new IntrinsicInfo(X86Instruction.Divss,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Haddpd,     new IntrinsicInfo(X86Instruction.Haddpd,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Haddps,     new IntrinsicInfo(X86Instruction.Haddps,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Maxpd,      new IntrinsicInfo(X86Instruction.Maxpd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Maxps,      new IntrinsicInfo(X86Instruction.Maxps,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Maxsd,      new IntrinsicInfo(X86Instruction.Maxsd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Maxss,      new IntrinsicInfo(X86Instruction.Maxss,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Minpd,      new IntrinsicInfo(X86Instruction.Minpd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Minps,      new IntrinsicInfo(X86Instruction.Minps,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Minsd,      new IntrinsicInfo(X86Instruction.Minsd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Minss,      new IntrinsicInfo(X86Instruction.Minss,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Movhlps,    new IntrinsicInfo(X86Instruction.Movhlps,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Movlhps,    new IntrinsicInfo(X86Instruction.Movlhps,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Mulpd,      new IntrinsicInfo(X86Instruction.Mulpd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Mulps,      new IntrinsicInfo(X86Instruction.Mulps,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Mulsd,      new IntrinsicInfo(X86Instruction.Mulsd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Mulss,      new IntrinsicInfo(X86Instruction.Mulss,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Paddb,      new IntrinsicInfo(X86Instruction.Paddb,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Paddd,      new IntrinsicInfo(X86Instruction.Paddd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Paddq,      new IntrinsicInfo(X86Instruction.Paddq,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Paddw,      new IntrinsicInfo(X86Instruction.Paddw,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Pand,       new IntrinsicInfo(X86Instruction.Pand,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Pandn,      new IntrinsicInfo(X86Instruction.Pandn,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Pavgb,      new IntrinsicInfo(X86Instruction.Pavgb,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Pavgw,      new IntrinsicInfo(X86Instruction.Pavgw,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Pblendvb,   new IntrinsicInfo(X86Instruction.Pblendvb,   IntrinsicType.Ternary));
+            Add(Intrinsic.X86Pcmpeqb,    new IntrinsicInfo(X86Instruction.Pcmpeqb,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpeqd,    new IntrinsicInfo(X86Instruction.Pcmpeqd,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpeqq,    new IntrinsicInfo(X86Instruction.Pcmpeqq,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpeqw,    new IntrinsicInfo(X86Instruction.Pcmpeqw,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpgtb,    new IntrinsicInfo(X86Instruction.Pcmpgtb,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpgtd,    new IntrinsicInfo(X86Instruction.Pcmpgtd,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpgtq,    new IntrinsicInfo(X86Instruction.Pcmpgtq,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Pcmpgtw,    new IntrinsicInfo(X86Instruction.Pcmpgtw,    IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxsb,     new IntrinsicInfo(X86Instruction.Pmaxsb,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxsd,     new IntrinsicInfo(X86Instruction.Pmaxsd,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxsw,     new IntrinsicInfo(X86Instruction.Pmaxsw,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxub,     new IntrinsicInfo(X86Instruction.Pmaxub,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxud,     new IntrinsicInfo(X86Instruction.Pmaxud,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmaxuw,     new IntrinsicInfo(X86Instruction.Pmaxuw,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminsb,     new IntrinsicInfo(X86Instruction.Pminsb,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminsd,     new IntrinsicInfo(X86Instruction.Pminsd,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminsw,     new IntrinsicInfo(X86Instruction.Pminsw,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminub,     new IntrinsicInfo(X86Instruction.Pminub,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminud,     new IntrinsicInfo(X86Instruction.Pminud,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pminuw,     new IntrinsicInfo(X86Instruction.Pminuw,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmovsxbw,   new IntrinsicInfo(X86Instruction.Pmovsxbw,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovsxdq,   new IntrinsicInfo(X86Instruction.Pmovsxdq,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovsxwd,   new IntrinsicInfo(X86Instruction.Pmovsxwd,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovzxbw,   new IntrinsicInfo(X86Instruction.Pmovzxbw,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovzxdq,   new IntrinsicInfo(X86Instruction.Pmovzxdq,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmovzxwd,   new IntrinsicInfo(X86Instruction.Pmovzxwd,   IntrinsicType.Unary));
+            Add(Intrinsic.X86Pmulld,     new IntrinsicInfo(X86Instruction.Pmulld,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pmullw,     new IntrinsicInfo(X86Instruction.Pmullw,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Popcnt,     new IntrinsicInfo(X86Instruction.Popcnt,     IntrinsicType.PopCount));
+            Add(Intrinsic.X86Por,        new IntrinsicInfo(X86Instruction.Por,        IntrinsicType.Binary));
+            Add(Intrinsic.X86Pshufb,     new IntrinsicInfo(X86Instruction.Pshufb,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Pslld,      new IntrinsicInfo(X86Instruction.Pslld,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Pslldq,     new IntrinsicInfo(X86Instruction.Pslldq,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Psllq,      new IntrinsicInfo(X86Instruction.Psllq,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psllw,      new IntrinsicInfo(X86Instruction.Psllw,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrad,      new IntrinsicInfo(X86Instruction.Psrad,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psraw,      new IntrinsicInfo(X86Instruction.Psraw,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrld,      new IntrinsicInfo(X86Instruction.Psrld,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrlq,      new IntrinsicInfo(X86Instruction.Psrlq,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrldq,     new IntrinsicInfo(X86Instruction.Psrldq,     IntrinsicType.Binary));
+            Add(Intrinsic.X86Psrlw,      new IntrinsicInfo(X86Instruction.Psrlw,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psubb,      new IntrinsicInfo(X86Instruction.Psubb,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psubd,      new IntrinsicInfo(X86Instruction.Psubd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psubq,      new IntrinsicInfo(X86Instruction.Psubq,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Psubw,      new IntrinsicInfo(X86Instruction.Psubw,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckhbw,  new IntrinsicInfo(X86Instruction.Punpckhbw,  IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckhdq,  new IntrinsicInfo(X86Instruction.Punpckhdq,  IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckhwd,  new IntrinsicInfo(X86Instruction.Punpckhwd,  IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpcklbw,  new IntrinsicInfo(X86Instruction.Punpcklbw,  IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpckldq,  new IntrinsicInfo(X86Instruction.Punpckldq,  IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary));
+            Add(Intrinsic.X86Punpcklwd,  new IntrinsicInfo(X86Instruction.Punpcklwd,  IntrinsicType.Binary));
+            Add(Intrinsic.X86Pxor,       new IntrinsicInfo(X86Instruction.Pxor,       IntrinsicType.Binary));
+            Add(Intrinsic.X86Rcpps,      new IntrinsicInfo(X86Instruction.Rcpps,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Rcpss,      new IntrinsicInfo(X86Instruction.Rcpss,      IntrinsicType.Unary));
+            Add(Intrinsic.X86Roundpd,    new IntrinsicInfo(X86Instruction.Roundpd,    IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Roundps,    new IntrinsicInfo(X86Instruction.Roundps,    IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Roundsd,    new IntrinsicInfo(X86Instruction.Roundsd,    IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Roundss,    new IntrinsicInfo(X86Instruction.Roundss,    IntrinsicType.BinaryImm));
+            Add(Intrinsic.X86Rsqrtps,    new IntrinsicInfo(X86Instruction.Rsqrtps,    IntrinsicType.Unary));
+            Add(Intrinsic.X86Rsqrtss,    new IntrinsicInfo(X86Instruction.Rsqrtss,    IntrinsicType.Unary));
+            Add(Intrinsic.X86Shufpd,     new IntrinsicInfo(X86Instruction.Shufpd,     IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Shufps,     new IntrinsicInfo(X86Instruction.Shufps,     IntrinsicType.TernaryImm));
+            Add(Intrinsic.X86Sqrtpd,     new IntrinsicInfo(X86Instruction.Sqrtpd,     IntrinsicType.Unary));
+            Add(Intrinsic.X86Sqrtps,     new IntrinsicInfo(X86Instruction.Sqrtps,     IntrinsicType.Unary));
+            Add(Intrinsic.X86Sqrtsd,     new IntrinsicInfo(X86Instruction.Sqrtsd,     IntrinsicType.Unary));
+            Add(Intrinsic.X86Sqrtss,     new IntrinsicInfo(X86Instruction.Sqrtss,     IntrinsicType.Unary));
+            Add(Intrinsic.X86Subpd,      new IntrinsicInfo(X86Instruction.Subpd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Subps,      new IntrinsicInfo(X86Instruction.Subps,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Subsd,      new IntrinsicInfo(X86Instruction.Subsd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Subss,      new IntrinsicInfo(X86Instruction.Subss,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Unpckhpd,   new IntrinsicInfo(X86Instruction.Unpckhpd,   IntrinsicType.Binary));
+            Add(Intrinsic.X86Unpckhps,   new IntrinsicInfo(X86Instruction.Unpckhps,   IntrinsicType.Binary));
+            Add(Intrinsic.X86Unpcklpd,   new IntrinsicInfo(X86Instruction.Unpcklpd,   IntrinsicType.Binary));
+            Add(Intrinsic.X86Unpcklps,   new IntrinsicInfo(X86Instruction.Unpcklps,   IntrinsicType.Binary));
+            Add(Intrinsic.X86Xorpd,      new IntrinsicInfo(X86Instruction.Xorpd,      IntrinsicType.Binary));
+            Add(Intrinsic.X86Xorps,      new IntrinsicInfo(X86Instruction.Xorps,      IntrinsicType.Binary));
+        }
+
+        private static void Add(Intrinsic intrin, IntrinsicInfo info)
+        {
+            _intrinTable[(int)intrin] = info;
+        }
+
+        public static IntrinsicInfo GetInfo(Intrinsic intrin)
+        {
+            return _intrinTable[(int)intrin];
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/ARMeilleure/CodeGen/X86/IntrinsicType.cs
new file mode 100644
index 000000000..4e9b33e1e
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/IntrinsicType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.X86
+{
+    enum IntrinsicType
+    {
+        Comis_,
+        PopCount,
+        Unary,
+        UnaryToGpr,
+        Binary,
+        BinaryImm,
+        Ternary,
+        TernaryImm
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs
new file mode 100644
index 000000000..a14901311
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -0,0 +1,1280 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.CodeGen.X86
+{
+    using LLNode = LinkedListNode<Node>;
+
+    static class PreAllocator
+    {
+        public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+        {
+            maxCallArgs = -1;
+
+            CallConvName callConv = CallingConvention.GetCurrentCallConv();
+
+            Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+            foreach (BasicBlock block in cctx.Cfg.Blocks)
+            {
+                LLNode nextNode;
+
+                for (LLNode node = block.Operations.First; node != null; node = nextNode)
+                {
+                    nextNode = node.Next;
+
+                    if (!(node.Value is Operation operation))
+                    {
+                        continue;
+                    }
+
+                    HandleConstantCopy(node, operation);
+
+                    HandleSameDestSrc1Copy(node, operation);
+
+                    HandleFixedRegisterCopy(node, operation);
+
+                    switch (operation.Instruction)
+                    {
+                        case Instruction.Call:
+                            // Get the maximum number of arguments used on a call.
+                            // On windows, when a struct is returned from the call,
+                            // we also need to pass the pointer where the struct
+                            // should be written on the first argument.
+                            int argsCount = operation.SourcesCount - 1;
+
+                            if (operation.Destination != null && operation.Destination.Type == OperandType.V128)
+                            {
+                                argsCount++;
+                            }
+
+                            if (maxCallArgs < argsCount)
+                            {
+                                maxCallArgs = argsCount;
+                            }
+
+                            // Copy values to registers expected by the function
+                            // being called, as mandated by the ABI.
+                            if (callConv == CallConvName.Windows)
+                            {
+                                node = HandleCallWindowsAbi(stackAlloc, node, operation);
+                            }
+                            else /* if (callConv == CallConvName.SystemV) */
+                            {
+                                node = HandleCallSystemVAbi(node, operation);
+                            }
+                            break;
+
+                        case Instruction.ConvertToFPUI:
+                            HandleConvertToFPUI(node, operation);
+                            break;
+
+                        case Instruction.LoadArgument:
+                            if (callConv == CallConvName.Windows)
+                            {
+                                HandleLoadArgumentWindowsAbi(cctx, node, preservedArgs, operation);
+                            }
+                            else /* if (callConv == CallConvName.SystemV) */
+                            {
+                                HandleLoadArgumentSystemVAbi(cctx, node, preservedArgs, operation);
+                            }
+                            break;
+
+                        case Instruction.Negate:
+                            if (!operation.GetSource(0).Type.IsInteger())
+                            {
+                                node = HandleNegate(node, operation);
+                            }
+                            break;
+
+                        case Instruction.Return:
+                            if (callConv == CallConvName.Windows)
+                            {
+                                HandleReturnWindowsAbi(cctx, node, preservedArgs, operation);
+                            }
+                            else /* if (callConv == CallConvName.SystemV) */
+                            {
+                                HandleReturnSystemVAbi(node, operation);
+                            }
+                            break;
+
+                        case Instruction.VectorInsert8:
+                            if (!HardwareCapabilities.SupportsSse41)
+                            {
+                                node = HandleVectorInsert8(node, operation);
+                            }
+                            break;
+                    }
+                }
+            }
+        }
+
+        private static void HandleConstantCopy(LLNode node, Operation operation)
+        {
+            if (operation.SourcesCount == 0 || IsIntrinsic(operation.Instruction))
+            {
+                return;
+            }
+
+            Instruction inst = operation.Instruction;
+
+            Operand src1 = operation.GetSource(0);
+            Operand src2;
+
+            if (src1.Kind == OperandKind.Constant)
+            {
+                if (!src1.Type.IsInteger())
+                {
+                    // Handle non-integer types (FP32, FP64 and V128).
+                    // For instructions without an immediate operand, we do the following:
+                    // - Insert a copy with the constant value (as integer) to a GPR.
+                    // - Insert a copy from the GPR to a XMM register.
+                    // - Replace the constant use with the XMM register.
+                    src1 = AddXmmCopy(node, src1);
+
+                    operation.SetSource(0, src1);
+                }
+                else if (!HasConstSrc1(inst))
+                {
+                    // Handle integer types.
+                    // Most ALU instructions accepts a 32-bits immediate on the second operand.
+                    // We need to ensure the following:
+                    // - If the constant is on operand 1, we need to move it.
+                    // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+                    // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+                    // - If the constant is on operand 2, we check if the instruction supports it,
+                    // if not, we also add a copy. 64-bits constants are usually not supported.
+                    if (IsCommutative(inst))
+                    {
+                        src2 = operation.GetSource(1);
+
+                        Operand temp = src1;
+
+                        src1 = src2;
+                        src2 = temp;
+
+                        operation.SetSource(0, src1);
+                        operation.SetSource(1, src2);
+                    }
+
+                    if (src1.Kind == OperandKind.Constant)
+                    {
+                        src1 = AddCopy(node, src1);
+
+                        operation.SetSource(0, src1);
+                    }
+                }
+            }
+
+            if (operation.SourcesCount < 2)
+            {
+                return;
+            }
+
+            src2 = operation.GetSource(1);
+
+            if (src2.Kind == OperandKind.Constant)
+            {
+                if (!src2.Type.IsInteger())
+                {
+                    src2 = AddXmmCopy(node, src2);
+
+                    operation.SetSource(1, src2);
+                }
+                else if (!HasConstSrc2(inst) || IsLongConst(src2))
+                {
+                    src2 = AddCopy(node, src2);
+
+                    operation.SetSource(1, src2);
+                }
+            }
+        }
+
+        private static LLNode HandleFixedRegisterCopy(LLNode node, Operation operation)
+        {
+            Operand dest = operation.Destination;
+
+            LinkedList<Node> nodes = node.List;
+
+            switch (operation.Instruction)
+            {
+                case Instruction.CompareAndSwap128:
+                {
+                    // Handle the many restrictions of the compare and exchange (16 bytes) instruction:
+                    // - The expected value should be in RDX:RAX.
+                    // - The new value to be written should be in RCX:RBX.
+                    // - The value at the memory location is loaded to RDX:RAX.
+                    void SplitOperand(Operand source, Operand lr, Operand hr)
+                    {
+                        nodes.AddBefore(node, new Operation(Instruction.VectorExtract, lr, source, Const(0)));
+                        nodes.AddBefore(node, new Operation(Instruction.VectorExtract, hr, source, Const(1)));
+                    }
+
+                    Operand rax = Gpr(X86Register.Rax, OperandType.I64);
+                    Operand rbx = Gpr(X86Register.Rbx, OperandType.I64);
+                    Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+                    Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
+
+                    SplitOperand(operation.GetSource(1), rax, rdx);
+                    SplitOperand(operation.GetSource(2), rbx, rcx);
+
+                    node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, rax));
+                    node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert,       dest, dest, rdx, Const(1)));
+
+                    operation.SetDestinations(new Operand[] { rdx, rax });
+
+                    operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx });
+
+                    break;
+                }
+
+                case Instruction.CpuId:
+                {
+                    // Handle the many restrictions of the CPU Id instruction:
+                    // - EAX controls the information returned by this instruction.
+                    // - When EAX is 1, feature information is returned.
+                    // - The information is written to registers EAX, EBX, ECX and EDX.
+                    Debug.Assert(dest.Type == OperandType.I64);
+
+                    Operand eax = Gpr(X86Register.Rax, OperandType.I32);
+                    Operand ebx = Gpr(X86Register.Rbx, OperandType.I32);
+                    Operand ecx = Gpr(X86Register.Rcx, OperandType.I32);
+                    Operand edx = Gpr(X86Register.Rdx, OperandType.I32);
+
+                    // Value 0x01 = Version, family and feature information.
+                    nodes.AddBefore(node, new Operation(Instruction.Copy, eax, Const(1)));
+
+                    // Copy results to the destination register.
+                    // The values are split into 2 32-bits registers, we merge them
+                    // into a single 64-bits register.
+                    Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+
+                    node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, dest, edx));
+                    node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft,    dest, dest, Const(32)));
+                    node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr,    dest, dest, rcx));
+
+                    operation.SetDestinations(new Operand[] { eax, ebx, ecx, edx });
+
+                    operation.SetSources(new Operand[] { eax });
+
+                    break;
+                }
+
+                case Instruction.Divide:
+                case Instruction.DivideUI:
+                {
+                    // Handle the many restrictions of the division instructions:
+                    // - The dividend is always in RDX:RAX.
+                    // - The result is always in RAX.
+                    // - Additionally it also writes the remainder in RDX.
+                    if (dest.Type.IsInteger())
+                    {
+                        Operand src1 = operation.GetSource(0);
+
+                        Operand rax = Gpr(X86Register.Rax, src1.Type);
+                        Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+                        nodes.AddBefore(node, new Operation(Instruction.Copy,    rax, src1));
+                        nodes.AddBefore(node, new Operation(Instruction.Clobber, rdx));
+
+                        node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rax));
+
+                        operation.SetDestinations(new Operand[] { rdx, rax });
+
+                        operation.SetSources(new Operand[] { rdx, rax, operation.GetSource(1) });
+
+                        operation.Destination = rax;
+                    }
+
+                    break;
+                }
+
+                case Instruction.Extended:
+                {
+                    IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
+
+                    // PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
+                    if (intrinOp.Intrinsic == Intrinsic.X86Pblendvb && !HardwareCapabilities.SupportsVexEncoding)
+                    {
+                        Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
+
+                        nodes.AddBefore(node, new Operation(Instruction.Copy, xmm0, operation.GetSource(2)));
+
+                        operation.SetSource(2, xmm0);
+                    }
+
+                    break;
+                }
+
+                case Instruction.Multiply64HighSI:
+                case Instruction.Multiply64HighUI:
+                {
+                    // Handle the many restrictions of the i64 * i64 = i128 multiply instructions:
+                    // - The multiplicand is always in RAX.
+                    // - The lower 64-bits of the result is always in RAX.
+                    // - The higher 64-bits of the result is always in RDX.
+                    Operand src1 = operation.GetSource(0);
+
+                    Operand rax = Gpr(X86Register.Rax, src1.Type);
+                    Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+                    nodes.AddBefore(node, new Operation(Instruction.Copy, rax, src1));
+
+                    operation.SetSource(0, rax);
+
+                    node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, rdx));
+
+                    operation.SetDestinations(new Operand[] { rdx, rax });
+
+                    break;
+                }
+
+                case Instruction.RotateRight:
+                case Instruction.ShiftLeft:
+                case Instruction.ShiftRightSI:
+                case Instruction.ShiftRightUI:
+                {
+                    // The shift register is always implied to be CL (low 8-bits of RCX or ECX).
+                    if (operation.GetSource(1).Kind == OperandKind.LocalVariable)
+                    {
+                        Operand rcx = Gpr(X86Register.Rcx, OperandType.I32);
+
+                        nodes.AddBefore(node, new Operation(Instruction.Copy, rcx, operation.GetSource(1)));
+
+                        operation.SetSource(1, rcx);
+                    }
+
+                    break;
+                }
+            }
+
+            return node;
+        }
+
+        private static LLNode HandleSameDestSrc1Copy(LLNode node, Operation operation)
+        {
+            if (operation.Destination == null || operation.SourcesCount == 0)
+            {
+                return node;
+            }
+
+            Instruction inst = operation.Instruction;
+
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0);
+
+            LinkedList<Node> nodes = node.List;
+
+            // The multiply instruction (that maps to IMUL) is somewhat special, it has
+            // a three operand form where the second source is a immediate value.
+            bool threeOperandForm = inst == Instruction.Multiply && operation.GetSource(1).Kind == OperandKind.Constant;
+
+            if (IsSameOperandDestSrc1(operation) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm)
+            {
+                bool useNewLocal = false;
+
+                for (int srcIndex = 1; srcIndex < operation.SourcesCount; srcIndex++)
+                {
+                    if (operation.GetSource(srcIndex) == dest)
+                    {
+                        useNewLocal = true;
+
+                        break;
+                    }
+                }
+
+                if (useNewLocal)
+                {
+                    // Dest is being used as some source already, we need to use a new
+                    // local to store the temporary value, otherwise the value on dest
+                    // local would be overwritten.
+                    Operand temp = Local(dest.Type);
+
+                    nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src1));
+
+                    operation.SetSource(0, temp);
+
+                    node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp));
+
+                    operation.Destination = temp;
+                }
+                else
+                {
+                    nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src1));
+
+                    operation.SetSource(0, dest);
+                }
+            }
+            else if (inst == Instruction.ConditionalSelect)
+            {
+                Operand src2 = operation.GetSource(1);
+                Operand src3 = operation.GetSource(2);
+
+                if (src1 == dest || src2 == dest)
+                {
+                    Operand temp = Local(dest.Type);
+
+                    nodes.AddBefore(node, new Operation(Instruction.Copy, temp, src3));
+
+                    operation.SetSource(2, temp);
+
+                    node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, temp));
+
+                    operation.Destination = temp;
+                }
+                else
+                {
+                    nodes.AddBefore(node, new Operation(Instruction.Copy, dest, src3));
+
+                    operation.SetSource(2, dest);
+                }
+            }
+
+            return node;
+        }
+
+        private static LLNode HandleConvertToFPUI(LLNode node, Operation operation)
+        {
+            // Unsigned integer to FP conversions are not supported on X86.
+            // We need to turn them into signed integer to FP conversions, and
+            // adjust the final result.
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\".");
+
+            LinkedList<Node> nodes = node.List;
+
+            LLNode currentNode = node;
+
+            if (source.Type == OperandType.I32)
+            {
+                // For 32-bits integers, we can just zero-extend to 64-bits,
+                // and then use the 64-bits signed conversion instructions.
+                Operand zex = Local(OperandType.I64);
+
+                node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend32, zex,  source));
+                node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP,  dest, zex));
+            }
+            else /* if (source.Type == OperandType.I64) */
+            {
+                // For 64-bits integers, we need to do the following:
+                // - Ensure that the integer has the most significant bit clear.
+                // -- This can be done by shifting the value right by 1, that is, dividing by 2.
+                // -- The least significant bit is lost in this case though.
+                // - We can then convert the shifted value with a signed integer instruction.
+                // - The result still needs to be corrected after that.
+                // -- First, we need to multiply the result by 2, as we divided it by 2 before.
+                // --- This can be done efficiently by adding the result to itself.
+                // -- Then, we need to add the least significant bit that was shifted out.
+                // --- We can convert the least significant bit to float, and add it to the result.
+                Operand lsb  = Local(OperandType.I64);
+                Operand half = Local(OperandType.I64);
+
+                Operand lsbF = Local(dest.Type);
+
+                node = nodes.AddAfter(node, new Operation(Instruction.Copy, lsb,  source));
+                node = nodes.AddAfter(node, new Operation(Instruction.Copy, half, source));
+
+                node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd,   lsb,  lsb,  Const(1L)));
+                node = nodes.AddAfter(node, new Operation(Instruction.ShiftRightUI, half, half, Const(1)));
+
+                node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, lsbF, lsb));
+                node = nodes.AddAfter(node, new Operation(Instruction.ConvertToFP, dest, half));
+
+                node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, dest));
+                node = nodes.AddAfter(node, new Operation(Instruction.Add, dest, dest, lsbF));
+            }
+
+            Delete(currentNode, operation);
+
+            return node;
+        }
+
+        private static LLNode HandleNegate(LLNode node, Operation operation)
+        {
+            // There's no SSE FP negate instruction, so we need to transform that into
+            // a XOR of the value to be negated with a mask with the highest bit set.
+            // This also produces -0 for a negation of the value 0.
+            Operand dest   = operation.Destination;
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(dest.Type == OperandType.FP32 ||
+                         dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\".");
+
+            LinkedList<Node> nodes = node.List;
+
+            LLNode currentNode = node;
+
+            Operand res = Local(dest.Type);
+
+            node = nodes.AddAfter(node, new Operation(Instruction.VectorOne, res));
+
+            if (dest.Type == OperandType.FP32)
+            {
+                node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Pslld, res, res, Const(31)));
+            }
+            else /* if (dest.Type == OperandType.FP64) */
+            {
+                node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Psllq, res, res, Const(63)));
+            }
+
+            node = nodes.AddAfter(node, new IntrinsicOperation(Intrinsic.X86Xorps, res, res, source));
+
+            node = nodes.AddAfter(node, new Operation(Instruction.Copy, dest, res));
+
+            Delete(currentNode, operation);
+
+            return node;
+        }
+
+        private static LLNode HandleVectorInsert8(LLNode node, Operation operation)
+        {
+            // Handle vector insertion, when SSE 4.1 is not supported.
+            Operand dest = operation.Destination;
+            Operand src1 = operation.GetSource(0); // Vector
+            Operand src2 = operation.GetSource(1); // Value
+            Operand src3 = operation.GetSource(2); // Index
+
+            Debug.Assert(src3.Kind == OperandKind.Constant);
+
+            byte index = src3.AsByte();
+
+            Debug.Assert(index < 16);
+
+            LinkedList<Node> nodes = node.List;
+
+            LLNode currentNode = node;
+
+            Operand temp1 = Local(OperandType.I32);
+            Operand temp2 = Local(OperandType.I32);
+
+            node = nodes.AddAfter(node, new Operation(Instruction.Copy, temp2, src2));
+
+            Operation vextOp = new Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1));
+
+            node = nodes.AddAfter(node, vextOp);
+
+            if ((index & 1) != 0)
+            {
+                node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp1, temp1));
+                node = nodes.AddAfter(node, new Operation(Instruction.ShiftLeft,   temp2, temp2, Const(8)));
+                node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr,   temp1, temp1, temp2));
+            }
+            else
+            {
+                node = nodes.AddAfter(node, new Operation(Instruction.ZeroExtend8, temp2, temp2));
+                node = nodes.AddAfter(node, new Operation(Instruction.BitwiseAnd,  temp1, temp1, Const(0xff00)));
+                node = nodes.AddAfter(node, new Operation(Instruction.BitwiseOr,   temp1, temp1, temp2));
+            }
+
+            Operation vinsOp = new Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1));
+
+            node = nodes.AddAfter(node, vinsOp);
+
+            Delete(currentNode, operation);
+
+            return node;
+        }
+
+        private static LLNode HandleCallWindowsAbi(StackAllocator stackAlloc, LLNode node, Operation operation)
+        {
+            Operand dest = operation.Destination;
+
+            LinkedList<Node> nodes = node.List;
+
+            // Handle struct arguments.
+            int retArgs = 0;
+
+            int stackAllocOffset = 0;
+
+            int AllocateOnStack(int size)
+            {
+                // We assume that the stack allocator is initially empty (TotalSize = 0).
+                // Taking that into account, we can reuse the space allocated for other
+                // calls by keeping track of our own allocated size (stackAllocOffset).
+                // If the space allocated is not big enough, then we just expand it.
+                int offset = stackAllocOffset;
+
+                if (stackAllocOffset + size > stackAlloc.TotalSize)
+                {
+                    stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize);
+                }
+
+                stackAllocOffset += size;
+
+                return offset;
+            }
+
+            Operand arg0Reg = null;
+
+            if (dest != null && dest.Type == OperandType.V128)
+            {
+                int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes());
+
+                arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+                Operation allocOp = new Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset));
+
+                nodes.AddBefore(node, allocOp);
+
+                retArgs = 1;
+            }
+
+            int argsCount = operation.SourcesCount - 1;
+
+            int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs;
+
+            if (argsCount > maxArgs)
+            {
+                argsCount = maxArgs;
+            }
+
+            Operand[] sources = new Operand[1 + retArgs + argsCount];
+
+            sources[0] = operation.GetSource(0);
+
+            if (arg0Reg != null)
+            {
+                sources[1] = arg0Reg;
+            }
+
+            for (int index = 1; index < operation.SourcesCount; index++)
+            {
+                Operand source = operation.GetSource(index);
+
+                if (source.Type == OperandType.V128)
+                {
+                    Operand stackAddr = Local(OperandType.I64);
+
+                    int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes());
+
+                    nodes.AddBefore(node, new Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset)));
+
+                    Operation storeOp = new Operation(Instruction.Store, null, stackAddr, source);
+
+                    HandleConstantCopy(nodes.AddBefore(node, storeOp), storeOp);
+
+                    operation.SetSource(index, stackAddr);
+                }
+            }
+
+            // Handle arguments passed on registers.
+            for (int index = 0; index < argsCount; index++)
+            {
+                Operand source = operation.GetSource(index + 1);
+
+                Operand argReg;
+
+                int argIndex = index + retArgs;
+
+                if (source.Type.IsInteger())
+                {
+                    argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type);
+                }
+                else
+                {
+                    argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type);
+                }
+
+                Operation copyOp = new Operation(Instruction.Copy, argReg, source);
+
+                HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp);
+
+                sources[1 + retArgs + index] = argReg;
+            }
+
+            // The remaining arguments (those that are not passed on registers)
+            // should be passed on the stack, we write them to the stack with "SpillArg".
+            for (int index = argsCount; index < operation.SourcesCount - 1; index++)
+            {
+                Operand source = operation.GetSource(index + 1);
+
+                Operand offset = new Operand((index + retArgs) * 8);
+
+                Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source);
+
+                HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp);
+            }
+
+            if (dest != null)
+            {
+                if (dest.Type == OperandType.V128)
+                {
+                    Operand retValueAddr = Local(OperandType.I64);
+
+                    nodes.AddBefore(node, new Operation(Instruction.Copy, retValueAddr, arg0Reg));
+
+                    Operation loadOp = new Operation(Instruction.Load, dest, retValueAddr);
+
+                    node = nodes.AddAfter(node, loadOp);
+
+                    operation.Destination = null;
+                }
+                else
+                {
+                    Operand retReg = dest.Type.IsInteger()
+                        ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+                        : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+                    Operation copyOp = new Operation(Instruction.Copy, dest, retReg);
+
+                    node = nodes.AddAfter(node, copyOp);
+
+                    operation.Destination = retReg;
+                }
+            }
+
+            operation.SetSources(sources);
+
+            return node;
+        }
+
+        private static LLNode HandleCallSystemVAbi(LLNode node, Operation operation)
+        {
+            Operand dest = operation.Destination;
+
+            LinkedList<Node> nodes = node.List;
+
+            List<Operand> sources = new List<Operand>();
+
+            sources.Add(operation.GetSource(0));
+
+            int argsCount = operation.SourcesCount - 1;
+
+            int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+            int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+            int intCount = 0;
+            int vecCount = 0;
+
+            int stackOffset = 0;
+
+            for (int index = 0; index < argsCount; index++)
+            {
+                Operand source = operation.GetSource(index + 1);
+
+                bool passOnReg;
+
+                if (source.Type.IsInteger())
+                {
+                    passOnReg = intCount < intMax;
+                }
+                else if (source.Type == OperandType.V128)
+                {
+                    passOnReg = intCount + 1 < intMax;
+                }
+                else
+                {
+                    passOnReg = vecCount < vecMax;
+                }
+
+                if (source.Type == OperandType.V128 && passOnReg)
+                {
+                    // V128 is a struct, we pass each half on a GPR if possible.
+                    Operand argReg  = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+                    Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+                    nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg,  source, Const(0)));
+                    nodes.AddBefore(node, new Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+                    continue;
+                }
+
+                if (passOnReg)
+                {
+                    Operand argReg = source.Type.IsInteger()
+                        ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+                        : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+                    Operation copyOp = new Operation(Instruction.Copy, argReg, source);
+
+                    HandleConstantCopy(nodes.AddBefore(node, copyOp), copyOp);
+
+                    sources.Add(argReg);
+                }
+                else
+                {
+                    Operand offset = new Operand(stackOffset);
+
+                    Operation spillOp = new Operation(Instruction.SpillArg, null, offset, source);
+
+                    HandleConstantCopy(nodes.AddBefore(node, spillOp), spillOp);
+
+                    stackOffset += source.Type.GetSizeInBytes();
+                }
+            }
+
+            if (dest != null)
+            {
+                if (dest.Type == OperandType.V128)
+                {
+                    Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(),     OperandType.I64);
+                    Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+                    node = nodes.AddAfter(node, new Operation(Instruction.VectorCreateScalar, dest, retLReg));
+                    node = nodes.AddAfter(node, new Operation(Instruction.VectorInsert,       dest, dest, retHReg, Const(1)));
+
+                    operation.Destination = null;
+                }
+                else
+                {
+                    Operand retReg = dest.Type.IsInteger()
+                        ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+                        : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+                    Operation copyOp = new Operation(Instruction.Copy, dest, retReg);
+
+                    node = nodes.AddAfter(node, copyOp);
+
+                    operation.Destination = retReg;
+                }
+            }
+
+            operation.SetSources(sources.ToArray());
+
+            return node;
+        }
+
+        private static void HandleLoadArgumentWindowsAbi(
+            CompilerContext cctx,
+            LLNode node,
+            Operand[] preservedArgs,
+            Operation operation)
+        {
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+            int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0;
+
+            int index = source.AsInt32() + retArgs;
+
+            if (index < CallingConvention.GetArgumentsOnRegsCount())
+            {
+                Operand dest = operation.Destination;
+
+                if (preservedArgs[index] == null)
+                {
+                    Operand argReg, pArg;
+
+                    if (dest.Type.IsInteger())
+                    {
+                        argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type);
+
+                        pArg = Local(dest.Type);
+                    }
+                    else if (dest.Type == OperandType.V128)
+                    {
+                        argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64);
+
+                        pArg = Local(OperandType.I64);
+                    }
+                    else
+                    {
+                        argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type);
+
+                        pArg = Local(dest.Type);
+                    }
+
+                    Operation copyOp = new Operation(Instruction.Copy, pArg, argReg);
+
+                    cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+                    preservedArgs[index] = pArg;
+                }
+
+                Operation argCopyOp = new Operation(dest.Type == OperandType.V128
+                    ? Instruction.Load
+                    : Instruction.Copy, dest, preservedArgs[index]);
+
+                node.List.AddBefore(node, argCopyOp);
+
+                Delete(node, operation);
+            }
+            else
+            {
+                // TODO: Pass on stack.
+            }
+        }
+
+        private static void HandleLoadArgumentSystemVAbi(
+            CompilerContext cctx,
+            LLNode node,
+            Operand[] preservedArgs,
+            Operation operation)
+        {
+            Operand source = operation.GetSource(0);
+
+            Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+            int index = source.AsInt32();
+
+            int intCount = 0;
+            int vecCount = 0;
+
+            for (int cIndex = 0; cIndex < index; cIndex++)
+            {
+                OperandType argType = cctx.FuncArgTypes[cIndex];
+
+                if (argType.IsInteger())
+                {
+                    intCount++;
+                }
+                else if (argType == OperandType.V128)
+                {
+                    intCount += 2;
+                }
+                else
+                {
+                    vecCount++;
+                }
+            }
+
+            bool passOnReg;
+
+            if (source.Type.IsInteger())
+            {
+                passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount();
+            }
+            else if (source.Type == OperandType.V128)
+            {
+                passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount();
+            }
+            else
+            {
+                passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount();
+            }
+
+            if (passOnReg)
+            {
+                Operand dest = operation.Destination;
+
+                if (preservedArgs[index] == null)
+                {
+                    if (dest.Type == OperandType.V128)
+                    {
+                        // V128 is a struct, we pass each half on a GPR if possible.
+                        Operand pArg = Local(OperandType.V128);
+
+                        Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount),     OperandType.I64);
+                        Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+                        Operation copyL = new Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+                        Operation copyH = new Operation(Instruction.VectorInsert,       pArg, pArg, argHReg, Const(1));
+
+                        cctx.Cfg.Entry.Operations.AddFirst(copyH);
+                        cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+                        preservedArgs[index] = pArg;
+                    }
+                    else
+                    {
+                        Operand pArg = Local(dest.Type);
+
+                        Operand argReg = dest.Type.IsInteger()
+                            ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+                            : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+                        Operation copyOp = new Operation(Instruction.Copy, pArg, argReg);
+
+                        cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+                        preservedArgs[index] = pArg;
+                    }
+                }
+
+                Operation argCopyOp = new Operation(Instruction.Copy, dest, preservedArgs[index]);
+
+                node.List.AddBefore(node, argCopyOp);
+
+                Delete(node, operation);
+            }
+            else
+            {
+                // TODO: Pass on stack.
+            }
+        }
+
+        private static void HandleReturnWindowsAbi(
+            CompilerContext cctx,
+            LLNode node,
+            Operand[] preservedArgs,
+            Operation operation)
+        {
+            if (operation.SourcesCount == 0)
+            {
+                return;
+            }
+
+            Operand source = operation.GetSource(0);
+
+            Operand retReg;
+
+            if (source.Type.IsInteger())
+            {
+                retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type);
+            }
+            else if (source.Type == OperandType.V128)
+            {
+                if (preservedArgs[0] == null)
+                {
+                    Operand preservedArg = Local(OperandType.I64);
+
+                    Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+                    Operation copyOp = new Operation(Instruction.Copy, preservedArg, arg0);
+
+                    cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+                    preservedArgs[0] = preservedArg;
+                }
+
+                retReg = preservedArgs[0];
+            }
+            else
+            {
+                retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+            }
+
+            if (source.Type == OperandType.V128)
+            {
+                Operation retStoreOp = new Operation(Instruction.Store, null, retReg, source);
+
+                node.List.AddBefore(node, retStoreOp);
+            }
+            else
+            {
+                Operation retCopyOp = new Operation(Instruction.Copy, retReg, source);
+
+                node.List.AddBefore(node, retCopyOp);
+            }
+
+            operation.SetSources(new Operand[0]);
+        }
+
+        private static void HandleReturnSystemVAbi(LLNode node, Operation operation)
+        {
+            if (operation.SourcesCount == 0)
+            {
+                return;
+            }
+
+            Operand source = operation.GetSource(0);
+
+            if (source.Type == OperandType.V128)
+            {
+                Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(),     OperandType.I64);
+                Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+                node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+                node.List.AddBefore(node, new Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+            }
+            else
+            {
+                Operand retReg = source.Type.IsInteger()
+                    ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+                    : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+                Operation retCopyOp = new Operation(Instruction.Copy, retReg, source);
+
+                node.List.AddBefore(node, retCopyOp);
+            }
+        }
+
+        private static Operand AddXmmCopy(LLNode node, Operand source)
+        {
+            Operand temp = Local(source.Type);
+
+            Operand intConst = AddCopy(node, GetIntConst(source));
+
+            Operation copyOp = new Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+            node.List.AddBefore(node, copyOp);
+
+            return temp;
+        }
+
+        private static Operand AddCopy(LLNode node, Operand source)
+        {
+            Operand temp = Local(source.Type);
+
+            Operation copyOp = new Operation(Instruction.Copy, temp, source);
+
+            node.List.AddBefore(node, copyOp);
+
+            return temp;
+        }
+
+        private static Operand GetIntConst(Operand value)
+        {
+            if (value.Type == OperandType.FP32)
+            {
+                return Const(value.AsInt32());
+            }
+            else if (value.Type == OperandType.FP64)
+            {
+                return Const(value.AsInt64());
+            }
+
+            return value;
+        }
+
+        private static bool IsLongConst(Operand operand)
+        {
+            long value = operand.Type == OperandType.I32
+                ? operand.AsInt32()
+                : operand.AsInt64();
+
+            return !ConstFitsOnS32(value);
+        }
+
+        private static bool ConstFitsOnS32(long value)
+        {
+            return value == (int)value;
+        }
+
+        private static void Delete(LLNode node, Operation operation)
+        {
+            operation.Destination = null;
+
+            for (int index = 0; index < operation.SourcesCount; index++)
+            {
+                operation.SetSource(index, null);
+            }
+
+            node.List.Remove(node);
+        }
+
+        private static Operand Gpr(X86Register register, OperandType type)
+        {
+            return Register((int)register, RegisterType.Integer, type);
+        }
+
+        private static Operand Xmm(X86Register register, OperandType type)
+        {
+            return Register((int)register, RegisterType.Vector, type);
+        }
+
+        private static bool IsSameOperandDestSrc1(Operation operation)
+        {
+            switch (operation.Instruction)
+            {
+                case Instruction.Add:
+                case Instruction.Multiply:
+                case Instruction.Subtract:
+                    return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger();
+
+                case Instruction.BitwiseAnd:
+                case Instruction.BitwiseExclusiveOr:
+                case Instruction.BitwiseNot:
+                case Instruction.BitwiseOr:
+                case Instruction.ByteSwap:
+                case Instruction.Negate:
+                case Instruction.RotateRight:
+                case Instruction.ShiftLeft:
+                case Instruction.ShiftRightSI:
+                case Instruction.ShiftRightUI:
+                    return true;
+
+                case Instruction.Divide:
+                    return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
+
+                case Instruction.VectorInsert:
+                case Instruction.VectorInsert16:
+                case Instruction.VectorInsert8:
+                    return !HardwareCapabilities.SupportsVexEncoding;
+            }
+
+            return IsVexSameOperandDestSrc1(operation);
+        }
+
+        private static bool IsVexSameOperandDestSrc1(Operation operation)
+        {
+            if (IsIntrinsic(operation.Instruction))
+            {
+                bool isUnary = operation.SourcesCount < 2;
+
+                bool hasVecDest = operation.Destination != null && operation.Destination.Type == OperandType.V128;
+
+                return !HardwareCapabilities.SupportsVexEncoding && !isUnary && hasVecDest;
+            }
+
+            return false;
+        }
+
+        private static bool HasConstSrc1(Instruction inst)
+        {
+            switch (inst)
+            {
+                case Instruction.Copy:
+                case Instruction.LoadArgument:
+                case Instruction.Spill:
+                case Instruction.SpillArg:
+                    return true;
+            }
+
+            return false;
+        }
+
+        private static bool HasConstSrc2(Instruction inst)
+        {
+            switch (inst)
+            {
+                case Instruction.Add:
+                case Instruction.BitwiseAnd:
+                case Instruction.BitwiseExclusiveOr:
+                case Instruction.BitwiseOr:
+                case Instruction.CompareEqual:
+                case Instruction.CompareGreater:
+                case Instruction.CompareGreaterOrEqual:
+                case Instruction.CompareGreaterOrEqualUI:
+                case Instruction.CompareGreaterUI:
+                case Instruction.CompareLess:
+                case Instruction.CompareLessOrEqual:
+                case Instruction.CompareLessOrEqualUI:
+                case Instruction.CompareLessUI:
+                case Instruction.CompareNotEqual:
+                case Instruction.Multiply:
+                case Instruction.RotateRight:
+                case Instruction.ShiftLeft:
+                case Instruction.ShiftRightSI:
+                case Instruction.ShiftRightUI:
+                case Instruction.Subtract:
+                case Instruction.VectorExtract:
+                case Instruction.VectorExtract16:
+                case Instruction.VectorExtract8:
+                    return true;
+            }
+
+            return false;
+        }
+
+        private static bool IsCommutative(Instruction inst)
+        {
+            switch (inst)
+            {
+                case Instruction.Add:
+                case Instruction.BitwiseAnd:
+                case Instruction.BitwiseExclusiveOr:
+                case Instruction.BitwiseOr:
+                case Instruction.CompareEqual:
+                case Instruction.CompareNotEqual:
+                case Instruction.Multiply:
+                    return true;
+            }
+
+            return false;
+        }
+
+        private static bool IsIntrinsic(Instruction inst)
+        {
+            return inst == Instruction.Extended;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/X86Condition.cs b/ARMeilleure/CodeGen/X86/X86Condition.cs
new file mode 100644
index 000000000..a17c6d6c5
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/X86Condition.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.CodeGen.X86
+{
+    enum X86Condition
+    {
+        Overflow       = 0x0,
+        NotOverflow    = 0x1,
+        Below          = 0x2,
+        AboveOrEqual   = 0x3,
+        Equal          = 0x4,
+        NotEqual       = 0x5,
+        BelowOrEqual   = 0x6,
+        Above          = 0x7,
+        Sign           = 0x8,
+        NotSign        = 0x9,
+        ParityEven     = 0xa,
+        ParityOdd      = 0xb,
+        Less           = 0xc,
+        GreaterOrEqual = 0xd,
+        LessOrEqual    = 0xe,
+        Greater        = 0xf
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
new file mode 100644
index 000000000..10ba891aa
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -0,0 +1,190 @@
+namespace ARMeilleure.CodeGen.X86
+{
+    enum X86Instruction
+    {
+        Add,
+        Addpd,
+        Addps,
+        Addsd,
+        Addss,
+        And,
+        Andnpd,
+        Andnps,
+        Bsr,
+        Bswap,
+        Call,
+        Cmovcc,
+        Cmp,
+        Cmppd,
+        Cmpps,
+        Cmpsd,
+        Cmpss,
+        Cmpxchg16b,
+        Comisd,
+        Comiss,
+        Cpuid,
+        Cvtdq2pd,
+        Cvtdq2ps,
+        Cvtpd2dq,
+        Cvtpd2ps,
+        Cvtps2dq,
+        Cvtps2pd,
+        Cvtsd2si,
+        Cvtsd2ss,
+        Cvtsi2sd,
+        Cvtsi2ss,
+        Cvtss2sd,
+        Div,
+        Divpd,
+        Divps,
+        Divsd,
+        Divss,
+        Haddpd,
+        Haddps,
+        Idiv,
+        Imul,
+        Imul128,
+        Insertps,
+        Lea,
+        Maxpd,
+        Maxps,
+        Maxsd,
+        Maxss,
+        Minpd,
+        Minps,
+        Minsd,
+        Minss,
+        Mov,
+        Mov16,
+        Mov8,
+        Movd,
+        Movdqu,
+        Movhlps,
+        Movlhps,
+        Movq,
+        Movsd,
+        Movss,
+        Movsx16,
+        Movsx32,
+        Movsx8,
+        Movzx16,
+        Movzx8,
+        Mul128,
+        Mulpd,
+        Mulps,
+        Mulsd,
+        Mulss,
+        Neg,
+        Not,
+        Or,
+        Paddb,
+        Paddd,
+        Paddq,
+        Paddw,
+        Pand,
+        Pandn,
+        Pavgb,
+        Pavgw,
+        Pblendvb,
+        Pcmpeqb,
+        Pcmpeqd,
+        Pcmpeqq,
+        Pcmpeqw,
+        Pcmpgtb,
+        Pcmpgtd,
+        Pcmpgtq,
+        Pcmpgtw,
+        Pextrb,
+        Pextrd,
+        Pextrq,
+        Pextrw,
+        Pinsrb,
+        Pinsrd,
+        Pinsrq,
+        Pinsrw,
+        Pmaxsb,
+        Pmaxsd,
+        Pmaxsw,
+        Pmaxub,
+        Pmaxud,
+        Pmaxuw,
+        Pminsb,
+        Pminsd,
+        Pminsw,
+        Pminub,
+        Pminud,
+        Pminuw,
+        Pmovsxbw,
+        Pmovsxdq,
+        Pmovsxwd,
+        Pmovzxbw,
+        Pmovzxdq,
+        Pmovzxwd,
+        Pmulld,
+        Pmullw,
+        Pop,
+        Popcnt,
+        Por,
+        Pshufb,
+        Pshufd,
+        Pslld,
+        Pslldq,
+        Psllq,
+        Psllw,
+        Psrad,
+        Psraw,
+        Psrld,
+        Psrlq,
+        Psrldq,
+        Psrlw,
+        Psubb,
+        Psubd,
+        Psubq,
+        Psubw,
+        Punpckhbw,
+        Punpckhdq,
+        Punpckhqdq,
+        Punpckhwd,
+        Punpcklbw,
+        Punpckldq,
+        Punpcklqdq,
+        Punpcklwd,
+        Push,
+        Pxor,
+        Rcpps,
+        Rcpss,
+        Ror,
+        Roundpd,
+        Roundps,
+        Roundsd,
+        Roundss,
+        Rsqrtps,
+        Rsqrtss,
+        Sar,
+        Setcc,
+        Shl,
+        Shr,
+        Shufpd,
+        Shufps,
+        Sqrtpd,
+        Sqrtps,
+        Sqrtsd,
+        Sqrtss,
+        Sub,
+        Subpd,
+        Subps,
+        Subsd,
+        Subss,
+        Test,
+        Unpckhpd,
+        Unpckhps,
+        Unpcklpd,
+        Unpcklps,
+        Vpblendvb,
+        Xor,
+        Xorpd,
+        Xorps,
+
+        Count
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/CodeGen/X86/X86Register.cs b/ARMeilleure/CodeGen/X86/X86Register.cs
new file mode 100644
index 000000000..01f63e311
--- /dev/null
+++ b/ARMeilleure/CodeGen/X86/X86Register.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.CodeGen.X86
+{
+    enum X86Register
+    {
+        Invalid = -1,
+
+        Rax = 0,
+        Rcx = 1,
+        Rdx = 2,
+        Rbx = 3,
+        Rsp = 4,
+        Rbp = 5,
+        Rsi = 6,
+        Rdi = 7,
+        R8  = 8,
+        R9  = 9,
+        R10 = 10,
+        R11 = 11,
+        R12 = 12,
+        R13 = 13,
+        R14 = 14,
+        R15 = 15,
+
+        Xmm0  = 0,
+        Xmm1  = 1,
+        Xmm2  = 2,
+        Xmm3  = 3,
+        Xmm4  = 4,
+        Xmm5  = 5,
+        Xmm6  = 6,
+        Xmm7  = 7,
+        Xmm8  = 8,
+        Xmm9  = 9,
+        Xmm10 = 10,
+        Xmm11 = 11,
+        Xmm12 = 12,
+        Xmm13 = 13,
+        Xmm14 = 14,
+        Xmm15 = 15
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Common/BitMap.cs b/ARMeilleure/Common/BitMap.cs
new file mode 100644
index 000000000..9dff271b4
--- /dev/null
+++ b/ARMeilleure/Common/BitMap.cs
@@ -0,0 +1,138 @@
+using System.Collections;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Common
+{
+    class BitMap : IEnumerable<int>
+    {
+        private const int IntSize = 32;
+        private const int IntMask = IntSize - 1;
+
+        private List<int> _masks;
+
+        public BitMap(int initialCapacity)
+        {
+            int count = (initialCapacity + IntMask) / IntSize;
+
+            _masks = new List<int>(count);
+
+            while (count-- > 0)
+            {
+                _masks.Add(0);
+            }
+        }
+
+        public bool Set(int bit)
+        {
+            EnsureCapacity(bit + 1);
+
+            int wordIndex = bit / IntSize;
+            int wordBit   = bit & IntMask;
+
+            int wordMask = 1 << wordBit;
+
+            if ((_masks[wordIndex] & wordMask) != 0)
+            {
+                return false;
+            }
+
+            _masks[wordIndex] |= wordMask;
+
+            return true;
+        }
+
+        public void Clear(int bit)
+        {
+            EnsureCapacity(bit + 1);
+
+            int wordIndex = bit / IntSize;
+            int wordBit   = bit & IntMask;
+
+            int wordMask = 1 << wordBit;
+
+            _masks[wordIndex] &= ~wordMask;
+        }
+
+        public bool IsSet(int bit)
+        {
+            EnsureCapacity(bit + 1);
+
+            int wordIndex = bit / IntSize;
+            int wordBit   = bit & IntMask;
+
+            return (_masks[wordIndex] & (1 << wordBit)) != 0;
+        }
+
+        public bool Set(BitMap map)
+        {
+            EnsureCapacity(map._masks.Count * IntSize);
+
+            bool modified = false;
+
+            for (int index = 0; index < _masks.Count; index++)
+            {
+                int newValue = _masks[index] | map._masks[index];
+
+                if (_masks[index] != newValue)
+                {
+                    _masks[index] = newValue;
+
+                    modified = true;
+                }
+            }
+
+            return modified;
+        }
+
+        public bool Clear(BitMap map)
+        {
+            EnsureCapacity(map._masks.Count * IntSize);
+
+            bool modified = false;
+
+            for (int index = 0; index < _masks.Count; index++)
+            {
+                int newValue = _masks[index] & ~map._masks[index];
+
+                if (_masks[index] != newValue)
+                {
+                    _masks[index] = newValue;
+
+                    modified = true;
+                }
+            }
+
+            return modified;
+        }
+
+        private void EnsureCapacity(int size)
+        {
+            while (_masks.Count * IntSize < size)
+            {
+                _masks.Add(0);
+            }
+        }
+
+        public IEnumerator<int> GetEnumerator()
+        {
+            for (int index = 0; index < _masks.Count; index++)
+            {
+                int mask = _masks[index];
+
+                while (mask != 0)
+                {
+                    int bit = BitUtils.LowestBitSet(mask);
+
+                    mask &= ~(1 << bit);
+
+                    yield return index * IntSize + bit;
+                }
+            }
+        }
+
+        IEnumerator IEnumerable.GetEnumerator()
+        {
+            return GetEnumerator();
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Common/BitUtils.cs b/ARMeilleure/Common/BitUtils.cs
new file mode 100644
index 000000000..55344608c
--- /dev/null
+++ b/ARMeilleure/Common/BitUtils.cs
@@ -0,0 +1,109 @@
+using System.Runtime.CompilerServices;
+
+namespace ARMeilleure.Common
+{
+    static class BitUtils
+    {
+        private const int DeBrujinSequence = 0x77cb531;
+
+        private static int[] DeBrujinLbsLut;
+
+        static BitUtils()
+        {
+            DeBrujinLbsLut = new int[32];
+
+            for (int index = 0; index < DeBrujinLbsLut.Length; index++)
+            {
+                uint lutIndex = (uint)(DeBrujinSequence * (1 << index)) >> 27;
+
+                DeBrujinLbsLut[lutIndex] = index;
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int LowestBitSet(int value)
+        {
+            if (value == 0)
+            {
+                return -1;
+            }
+
+            int lsb = value & -value;
+
+            return DeBrujinLbsLut[(uint)(DeBrujinSequence * lsb) >> 27];
+        }
+
+        public static int HighestBitSet(int value)
+        {
+            if (value == 0)
+            {
+                return -1;
+            }
+
+            for (int bit = 31; bit >= 0; bit--)
+            {
+                if (((value >> bit) & 1) != 0)
+                {
+                    return bit;
+                }
+            }
+
+            return -1;
+        }
+
+        private static readonly sbyte[] HbsNibbleLut = { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
+
+        public static int HighestBitSetNibble(int value) => HbsNibbleLut[value & 0b1111];
+
+        public static long Replicate(long bits, int size)
+        {
+            long output = 0;
+
+            for (int bit = 0; bit < 64; bit += size)
+            {
+                output |= bits << bit;
+            }
+
+            return output;
+        }
+
+        public static int CountBits(int value)
+        {
+            int count = 0;
+
+            while (value != 0)
+            {
+                value &= ~(value & -value);
+
+                count++;
+            }
+
+            return count;
+        }
+
+        public static long FillWithOnes(int bits)
+        {
+            return bits == 64 ? -1L : (1L << bits) - 1;
+        }
+
+        public static int RotateRight(int bits, int shift, int size)
+        {
+            return (int)RotateRight((uint)bits, shift, size);
+        }
+
+        public static uint RotateRight(uint bits, int shift, int size)
+        {
+            return (bits >> shift) | (bits << (size - shift));
+        }
+
+        public static long RotateRight(long bits, int shift, int size)
+        {
+            return (long)RotateRight((ulong)bits, shift, size);
+        }
+
+        public static ulong RotateRight(ulong bits, int shift, int size)
+        {
+            return (bits >> shift) | (bits << (size - shift));
+        }
+    }
+}
diff --git a/ARMeilleure/Common/EnumUtils.cs b/ARMeilleure/Common/EnumUtils.cs
new file mode 100644
index 000000000..2a4aa645b
--- /dev/null
+++ b/ARMeilleure/Common/EnumUtils.cs
@@ -0,0 +1,12 @@
+using System;
+
+namespace ARMeilleure.Common
+{
+    static class EnumUtils
+    {
+        public static int GetCount(Type enumType)
+        {
+            return Enum.GetNames(enumType).Length;
+        }
+    }
+}
diff --git a/ARMeilleure/Decoders/Block.cs b/ARMeilleure/Decoders/Block.cs
new file mode 100644
index 000000000..3d13c2d5e
--- /dev/null
+++ b/ARMeilleure/Decoders/Block.cs
@@ -0,0 +1,99 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders
+{
+    class Block
+    {
+        public ulong Address    { get; set; }
+        public ulong EndAddress { get; set; }
+
+        public Block Next   { get; set; }
+        public Block Branch { get; set; }
+
+        public List<OpCode> OpCodes { get; private set; }
+
+        public Block()
+        {
+            OpCodes = new List<OpCode>();
+        }
+
+        public Block(ulong address) : this()
+        {
+            Address = address;
+        }
+
+        public void Split(Block rightBlock)
+        {
+            int splitIndex = BinarySearch(OpCodes, rightBlock.Address);
+
+            if ((ulong)OpCodes[splitIndex].Address < rightBlock.Address)
+            {
+                splitIndex++;
+            }
+
+            int splitCount = OpCodes.Count - splitIndex;
+
+            if (splitCount <= 0)
+            {
+                throw new ArgumentException("Can't split at right block address.");
+            }
+
+            rightBlock.EndAddress = EndAddress;
+
+            rightBlock.Next   = Next;
+            rightBlock.Branch = Branch;
+
+            rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount));
+
+            EndAddress = rightBlock.Address;
+
+            Next   = rightBlock;
+            Branch = null;
+
+            OpCodes.RemoveRange(splitIndex, splitCount);
+        }
+
+        private static int BinarySearch(List<OpCode> opCodes, ulong address)
+        {
+            int left   = 0;
+            int middle = 0;
+            int right  = opCodes.Count - 1;
+
+            while (left <= right)
+            {
+                int size = right - left;
+
+                middle = left + (size >> 1);
+
+                OpCode opCode = opCodes[middle];
+
+                if (address == (ulong)opCode.Address)
+                {
+                    break;
+                }
+
+                if (address < (ulong)opCode.Address)
+                {
+                    right = middle - 1;
+                }
+                else
+                {
+                    left = middle + 1;
+                }
+            }
+
+            return middle;
+        }
+
+        public OpCode GetLastOp()
+        {
+            if (OpCodes.Count > 0)
+            {
+                return OpCodes[OpCodes.Count - 1];
+            }
+
+            return null;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/Condition.cs b/ARMeilleure/Decoders/Condition.cs
new file mode 100644
index 000000000..727f897da
--- /dev/null
+++ b/ARMeilleure/Decoders/Condition.cs
@@ -0,0 +1,32 @@
+namespace ARMeilleure.Decoders
+{
+    enum Condition
+    {
+        Eq   = 0,
+        Ne   = 1,
+        GeUn = 2,
+        LtUn = 3,
+        Mi   = 4,
+        Pl   = 5,
+        Vs   = 6,
+        Vc   = 7,
+        GtUn = 8,
+        LeUn = 9,
+        Ge   = 10,
+        Lt   = 11,
+        Gt   = 12,
+        Le   = 13,
+        Al   = 14,
+        Nv   = 15
+    }
+
+    static class ConditionExtensions
+    {
+        public static Condition Invert(this Condition cond)
+        {
+            // Bit 0 of all conditions is basically a negation bit, so
+            // inverting this bit has the effect of inverting the condition.
+            return (Condition)((int)cond ^ 1);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/DataOp.cs b/ARMeilleure/Decoders/DataOp.cs
new file mode 100644
index 000000000..464d00898
--- /dev/null
+++ b/ARMeilleure/Decoders/DataOp.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    enum DataOp
+    {
+        Adr        = 0,
+        Arithmetic = 1,
+        Logical    = 2,
+        BitField   = 3
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/Decoder.cs b/ARMeilleure/Decoders/Decoder.cs
new file mode 100644
index 000000000..2311e9e96
--- /dev/null
+++ b/ARMeilleure/Decoders/Decoder.cs
@@ -0,0 +1,351 @@
+using ARMeilleure.Instructions;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Reflection.Emit;
+
+namespace ARMeilleure.Decoders
+{
+    static class Decoder
+    {
+        private delegate object MakeOp(InstDescriptor inst, ulong address, int opCode);
+
+        private static ConcurrentDictionary<Type, MakeOp> _opActivators;
+
+        static Decoder()
+        {
+            _opActivators = new ConcurrentDictionary<Type, MakeOp>();
+        }
+
+        public static Block[] DecodeBasicBlock(MemoryManager memory, ulong address, ExecutionMode mode)
+        {
+            Block block = new Block(address);
+
+            FillBlock(memory, mode, block, ulong.MaxValue);
+
+            return new Block[] { block };
+        }
+
+        public static Block[] DecodeFunction(MemoryManager memory, ulong address, ExecutionMode mode)
+        {
+            List<Block> blocks = new List<Block>();
+
+            Queue<Block> workQueue = new Queue<Block>();
+
+            Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
+
+            Block GetBlock(ulong blkAddress)
+            {
+                if (!visited.TryGetValue(blkAddress, out Block block))
+                {
+                    block = new Block(blkAddress);
+
+                    workQueue.Enqueue(block);
+
+                    visited.Add(blkAddress, block);
+                }
+
+                return block;
+            }
+
+            GetBlock(address);
+
+            while (workQueue.TryDequeue(out Block currBlock))
+            {
+                // Check if the current block is inside another block.
+                if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
+                {
+                    Block nBlock = blocks[nBlkIndex];
+
+                    if (nBlock.Address == currBlock.Address)
+                    {
+                        throw new InvalidOperationException("Found duplicate block address on the list.");
+                    }
+
+                    nBlock.Split(currBlock);
+
+                    blocks.Insert(nBlkIndex + 1, currBlock);
+
+                    continue;
+                }
+
+                // If we have a block after the current one, set the limit address.
+                ulong limitAddress = ulong.MaxValue;
+
+                if (nBlkIndex != blocks.Count)
+                {
+                    Block nBlock = blocks[nBlkIndex];
+
+                    int nextIndex = nBlkIndex + 1;
+
+                    if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
+                    {
+                        limitAddress = blocks[nextIndex].Address;
+                    }
+                    else if (nBlock.Address > currBlock.Address)
+                    {
+                        limitAddress = blocks[nBlkIndex].Address;
+                    }
+                }
+
+                FillBlock(memory, mode, currBlock, limitAddress);
+
+                if (currBlock.OpCodes.Count != 0)
+                {
+                    // Set child blocks. "Branch" is the block the branch instruction
+                    // points to (when taken), "Next" is the block at the next address,
+                    // executed when the branch is not taken. For Unconditional Branches
+                    // (except BL/BLR that are sub calls) or end of executable, Next is null.
+                    OpCode lastOp = currBlock.GetLastOp();
+
+                    bool isCall = IsCall(lastOp);
+
+                    if (lastOp is IOpCodeBImm op && !isCall)
+                    {
+                        currBlock.Branch = GetBlock((ulong)op.Immediate);
+                    }
+
+                    if (!IsUnconditionalBranch(lastOp) /*|| isCall*/)
+                    {
+                        currBlock.Next = GetBlock(currBlock.EndAddress);
+                    }
+                }
+
+                // Insert the new block on the list (sorted by address).
+                if (blocks.Count != 0)
+                {
+                    Block nBlock = blocks[nBlkIndex];
+
+                    blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
+                }
+                else
+                {
+                    blocks.Add(currBlock);
+                }
+            }
+
+            return blocks.ToArray();
+        }
+
+        private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
+        {
+            index = 0;
+
+            int left  = 0;
+            int right = blocks.Count - 1;
+
+            while (left <= right)
+            {
+                int size = right - left;
+
+                int middle = left + (size >> 1);
+
+                Block block = blocks[middle];
+
+                index = middle;
+
+                if (address >= block.Address && address < block.EndAddress)
+                {
+                    return true;
+                }
+
+                if (address < block.Address)
+                {
+                    right = middle - 1;
+                }
+                else
+                {
+                    left = middle + 1;
+                }
+            }
+
+            return false;
+        }
+
+        private static void FillBlock(
+            MemoryManager memory,
+            ExecutionMode mode,
+            Block         block,
+            ulong         limitAddress)
+        {
+            ulong address = block.Address;
+
+            OpCode opCode;
+
+            do
+            {
+                if (address >= limitAddress)
+                {
+                    break;
+                }
+
+                opCode = DecodeOpCode(memory, address, mode);
+
+                block.OpCodes.Add(opCode);
+
+                address += (ulong)opCode.OpCodeSizeInBytes;
+            }
+            while (!(IsBranch(opCode) || IsException(opCode)));
+
+            block.EndAddress = address;
+        }
+
+        private static bool IsBranch(OpCode opCode)
+        {
+            return opCode is OpCodeBImm ||
+                   opCode is OpCodeBReg || IsAarch32Branch(opCode);
+        }
+
+        private static bool IsUnconditionalBranch(OpCode opCode)
+        {
+            return opCode is OpCodeBImmAl ||
+                   opCode is OpCodeBReg   || IsAarch32UnconditionalBranch(opCode);
+        }
+
+        private static bool IsAarch32UnconditionalBranch(OpCode opCode)
+        {
+            if (!(opCode is OpCode32 op))
+            {
+                return false;
+            }
+
+            // Note: On ARM32, most instructions have conditional execution,
+            // so there's no "Always" (unconditional) branch like on ARM64.
+            // We need to check if the condition is "Always" instead.
+            return IsAarch32Branch(op) && op.Cond >= Condition.Al;
+        }
+
+        private static bool IsAarch32Branch(OpCode opCode)
+        {
+            // Note: On ARM32, most ALU operations can write to R15 (PC),
+            // so we must consider such operations as a branch in potential aswell.
+            if (opCode is IOpCode32Alu opAlu && opAlu.Rd == RegisterAlias.Aarch32Pc)
+            {
+                return true;
+            }
+
+            // Same thing for memory operations. We have the cases where PC is a target
+            // register (Rt == 15 or (mask & (1 << 15)) != 0), and cases where there is
+            // a write back to PC (wback == true && Rn == 15), however the later may
+            // be "undefined" depending on the CPU, so compilers should not produce that.
+            if (opCode is IOpCode32Mem || opCode is IOpCode32MemMult)
+            {
+                int rt, rn;
+
+                bool wBack, isLoad;
+
+                if (opCode is IOpCode32Mem opMem)
+                {
+                    rt     = opMem.Rt;
+                    rn     = opMem.Rn;
+                    wBack  = opMem.WBack;
+                    isLoad = opMem.IsLoad;
+
+                    // For the dual load, we also need to take into account the
+                    // case were Rt2 == 15 (PC).
+                    if (rt == 14 && opMem.Instruction.Name == InstName.Ldrd)
+                    {
+                        rt = RegisterAlias.Aarch32Pc;
+                    }
+                }
+                else if (opCode is IOpCode32MemMult opMemMult)
+                {
+                    const int pcMask = 1 << RegisterAlias.Aarch32Pc;
+
+                    rt     = (opMemMult.RegisterMask & pcMask) != 0 ? RegisterAlias.Aarch32Pc : 0;
+                    rn     =  opMemMult.Rn;
+                    wBack  =  opMemMult.PostOffset != 0;
+                    isLoad =  opMemMult.IsLoad;
+                }
+                else
+                {
+                    throw new NotImplementedException($"The type \"{opCode.GetType().Name}\" is not implemented on the decoder.");
+                }
+
+                if ((rt == RegisterAlias.Aarch32Pc && isLoad) ||
+                    (rn == RegisterAlias.Aarch32Pc && wBack))
+                {
+                    return true;
+                }
+            }
+
+            // Explicit branch instructions.
+            return opCode is IOpCode32BImm ||
+                   opCode is IOpCode32BReg;
+        }
+
+        private static bool IsCall(OpCode opCode)
+        {
+            // TODO (CQ): ARM32 support.
+            return opCode.Instruction.Name == InstName.Bl ||
+                   opCode.Instruction.Name == InstName.Blr;
+        }
+
+        private static bool IsException(OpCode opCode)
+        {
+            return opCode.Instruction.Name == InstName.Brk ||
+                   opCode.Instruction.Name == InstName.Svc ||
+                   opCode.Instruction.Name == InstName.Und;
+        }
+
+        public static OpCode DecodeOpCode(MemoryManager memory, ulong address, ExecutionMode mode)
+        {
+            int opCode = memory.ReadInt32((long)address);
+
+            InstDescriptor inst;
+
+            Type type;
+
+            if (mode == ExecutionMode.Aarch64)
+            {
+                (inst, type) = OpCodeTable.GetInstA64(opCode);
+            }
+            else
+            {
+                if (mode == ExecutionMode.Aarch32Arm)
+                {
+                    (inst, type) = OpCodeTable.GetInstA32(opCode);
+                }
+                else /* if (mode == ExecutionMode.Aarch32Thumb) */
+                {
+                    (inst, type) = OpCodeTable.GetInstT32(opCode);
+                }
+            }
+
+            if (type != null)
+            {
+                return MakeOpCode(inst, type, address, opCode);
+            }
+            else
+            {
+                return new OpCode(inst, address, opCode);
+            }
+        }
+
+        private static OpCode MakeOpCode(InstDescriptor inst, Type type, ulong address, int opCode)
+        {
+            MakeOp createInstance = _opActivators.GetOrAdd(type, CacheOpActivator);
+
+            return (OpCode)createInstance(inst, address, opCode);
+        }
+
+        private static MakeOp CacheOpActivator(Type type)
+        {
+            Type[] argTypes = new Type[] { typeof(InstDescriptor), typeof(ulong), typeof(int) };
+
+            DynamicMethod mthd = new DynamicMethod($"Make{type.Name}", type, argTypes);
+
+            ILGenerator generator = mthd.GetILGenerator();
+
+            generator.Emit(OpCodes.Ldarg_0);
+            generator.Emit(OpCodes.Ldarg_1);
+            generator.Emit(OpCodes.Ldarg_2);
+            generator.Emit(OpCodes.Newobj, type.GetConstructor(argTypes));
+            generator.Emit(OpCodes.Ret);
+
+            return (MakeOp)mthd.CreateDelegate(typeof(MakeOp));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/DecoderHelper.cs b/ARMeilleure/Decoders/DecoderHelper.cs
new file mode 100644
index 000000000..3cbd49123
--- /dev/null
+++ b/ARMeilleure/Decoders/DecoderHelper.cs
@@ -0,0 +1,113 @@
+using ARMeilleure.Common;
+using System;
+
+namespace ARMeilleure.Decoders
+{
+    static class DecoderHelper
+    {
+        public struct BitMask
+        {
+            public long WMask;
+            public long TMask;
+            public int  Pos;
+            public int  Shift;
+            public bool IsUndefined;
+
+            public static BitMask Invalid => new BitMask { IsUndefined = true };
+        }
+
+        public static BitMask DecodeBitMask(int opCode, bool immediate)
+        {
+            int immS = (opCode >> 10) & 0x3f;
+            int immR = (opCode >> 16) & 0x3f;
+
+            int n  = (opCode >> 22) & 1;
+            int sf = (opCode >> 31) & 1;
+
+            int length = BitUtils.HighestBitSet((~immS & 0x3f) | (n << 6));
+
+            if (length < 1 || (sf == 0 && n != 0))
+            {
+                return BitMask.Invalid;
+            }
+
+            int size = 1 << length;
+
+            int levels = size - 1;
+
+            int s = immS & levels;
+            int r = immR & levels;
+
+            if (immediate && s == levels)
+            {
+                return BitMask.Invalid;
+            }
+
+            long wMask = BitUtils.FillWithOnes(s + 1);
+            long tMask = BitUtils.FillWithOnes(((s - r) & levels) + 1);
+
+            if (r > 0)
+            {
+                wMask  = BitUtils.RotateRight(wMask, r, size);
+                wMask &= BitUtils.FillWithOnes(size);
+            }
+
+            return new BitMask()
+            {
+                WMask = BitUtils.Replicate(wMask, size),
+                TMask = BitUtils.Replicate(tMask, size),
+
+                Pos   = immS,
+                Shift = immR
+            };
+        }
+
+        public static long DecodeImm8Float(long imm, int size)
+        {
+            int e = 0, f = 0;
+
+            switch (size)
+            {
+                case 0: e =  8; f = 23; break;
+                case 1: e = 11; f = 52; break;
+
+                default: throw new ArgumentOutOfRangeException(nameof(size));
+            }
+
+            long value = (imm & 0x3f) << f - 4;
+
+            long eBit = (imm >> 6) & 1;
+            long sBit = (imm >> 7) & 1;
+
+            if (eBit != 0)
+            {
+                value |= (1L << e - 3) - 1 << f + 2;
+            }
+
+            value |= (eBit ^ 1) << f + e - 1;
+            value |=  sBit      << f + e;
+
+            return value;
+        }
+
+        public static long DecodeImm24_2(int opCode)
+        {
+            return ((long)opCode << 40) >> 38;
+        }
+
+        public static long DecodeImm26_2(int opCode)
+        {
+            return ((long)opCode << 38) >> 36;
+        }
+
+        public static long DecodeImmS19_2(int opCode)
+        {
+            return (((long)opCode << 40) >> 43) & ~3;
+        }
+
+        public static long DecodeImmS14_2(int opCode)
+        {
+            return (((long)opCode << 45) >> 48) & ~3;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode.cs b/ARMeilleure/Decoders/IOpCode.cs
new file mode 100644
index 000000000..37ba7a4c6
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode.cs
@@ -0,0 +1,17 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.Decoders
+{
+    interface IOpCode
+    {
+        ulong Address { get; }
+
+        InstDescriptor Instruction { get; }
+
+        RegisterSize RegisterSize { get; }
+
+        int GetBitsCount();
+
+        OperandType GetOperandType();
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32.cs b/ARMeilleure/Decoders/IOpCode32.cs
new file mode 100644
index 000000000..126c10690
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCode32 : IOpCode
+    {
+        Condition Cond { get; }
+
+        uint GetPc();
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32Alu.cs b/ARMeilleure/Decoders/IOpCode32Alu.cs
new file mode 100644
index 000000000..72aea30ef
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32Alu.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCode32Alu : IOpCode32
+    {
+        int Rd { get; }
+        int Rn { get; }
+
+        bool SetFlags { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32BImm.cs b/ARMeilleure/Decoders/IOpCode32BImm.cs
new file mode 100644
index 000000000..ec7db2c26
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32BImm.cs
@@ -0,0 +1,4 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCode32BImm : IOpCode32, IOpCodeBImm { }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32BReg.cs b/ARMeilleure/Decoders/IOpCode32BReg.cs
new file mode 100644
index 000000000..097ab4275
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32BReg.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCode32BReg : IOpCode32
+    {
+        int Rm { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32Mem.cs b/ARMeilleure/Decoders/IOpCode32Mem.cs
new file mode 100644
index 000000000..0585ab53a
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32Mem.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCode32Mem : IOpCode32
+    {
+        int Rt { get; }
+        int Rn { get; }
+
+        bool WBack { get; }
+
+        bool IsLoad { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCode32MemMult.cs b/ARMeilleure/Decoders/IOpCode32MemMult.cs
new file mode 100644
index 000000000..18fd3f6bf
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCode32MemMult.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCode32MemMult : IOpCode32
+    {
+        int Rn { get; }
+
+        int RegisterMask { get; }
+
+        int PostOffset { get; }
+
+        bool IsLoad { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeAlu.cs b/ARMeilleure/Decoders/IOpCodeAlu.cs
new file mode 100644
index 000000000..b8c28513d
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeAlu.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCodeAlu : IOpCode
+    {
+        int Rd { get; }
+        int Rn { get; }
+
+        DataOp DataOp { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeAluImm.cs b/ARMeilleure/Decoders/IOpCodeAluImm.cs
new file mode 100644
index 000000000..02f4c997b
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeAluImm.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCodeAluImm : IOpCodeAlu
+    {
+        long Immediate { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeAluRs.cs b/ARMeilleure/Decoders/IOpCodeAluRs.cs
new file mode 100644
index 000000000..22540b11a
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeAluRs.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCodeAluRs : IOpCodeAlu
+    {
+        int Shift { get; }
+        int Rm    { get; }
+
+        ShiftType ShiftType { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeAluRx.cs b/ARMeilleure/Decoders/IOpCodeAluRx.cs
new file mode 100644
index 000000000..9d16be787
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeAluRx.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCodeAluRx : IOpCodeAlu
+    {
+        int Shift { get; }
+        int Rm    { get; }
+
+        IntType IntType { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeBImm.cs b/ARMeilleure/Decoders/IOpCodeBImm.cs
new file mode 100644
index 000000000..958bff28d
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeBImm.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCodeBImm : IOpCode
+    {
+        long Immediate { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeCond.cs b/ARMeilleure/Decoders/IOpCodeCond.cs
new file mode 100644
index 000000000..9808f7c08
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeCond.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCodeCond : IOpCode
+    {
+        Condition Cond { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeLit.cs b/ARMeilleure/Decoders/IOpCodeLit.cs
new file mode 100644
index 000000000..74084a457
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeLit.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCodeLit : IOpCode
+    {
+        int  Rt        { get; }
+        long Immediate { get; }
+        int  Size      { get; }
+        bool Signed    { get; }
+        bool Prefetch  { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IOpCodeSimd.cs b/ARMeilleure/Decoders/IOpCodeSimd.cs
new file mode 100644
index 000000000..056ef045c
--- /dev/null
+++ b/ARMeilleure/Decoders/IOpCodeSimd.cs
@@ -0,0 +1,7 @@
+namespace ARMeilleure.Decoders
+{
+    interface IOpCodeSimd : IOpCode
+    {
+        int Size { get; }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/InstDescriptor.cs b/ARMeilleure/Decoders/InstDescriptor.cs
new file mode 100644
index 000000000..ee2b1c2e4
--- /dev/null
+++ b/ARMeilleure/Decoders/InstDescriptor.cs
@@ -0,0 +1,18 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+    struct InstDescriptor
+    {
+        public static InstDescriptor Undefined => new InstDescriptor(InstName.Und, null);
+
+        public InstName    Name    { get; }
+        public InstEmitter Emitter { get; }
+
+        public InstDescriptor(InstName name, InstEmitter emitter)
+        {
+            Name    = name;
+            Emitter = emitter;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/InstEmitter.cs b/ARMeilleure/Decoders/InstEmitter.cs
new file mode 100644
index 000000000..a8b526569
--- /dev/null
+++ b/ARMeilleure/Decoders/InstEmitter.cs
@@ -0,0 +1,6 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.Decoders
+{
+    delegate void InstEmitter(ArmEmitterContext context);
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/IntType.cs b/ARMeilleure/Decoders/IntType.cs
new file mode 100644
index 000000000..244e96805
--- /dev/null
+++ b/ARMeilleure/Decoders/IntType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+    enum IntType
+    {
+        UInt8  = 0,
+        UInt16 = 1,
+        UInt32 = 2,
+        UInt64 = 3,
+        Int8   = 4,
+        Int16  = 5,
+        Int32  = 6,
+        Int64  = 7
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode.cs b/ARMeilleure/Decoders/OpCode.cs
new file mode 100644
index 000000000..0bfc2456b
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode.cs
@@ -0,0 +1,48 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.Decoders
+{
+    class OpCode : IOpCode
+    {
+        public ulong Address   { get; private set; }
+        public int   RawOpCode { get; private set; }
+
+        public int OpCodeSizeInBytes { get; protected set; } = 4;
+
+        public InstDescriptor Instruction { get; protected set; }
+
+        public RegisterSize RegisterSize { get; protected set; }
+
+        public OpCode(InstDescriptor inst, ulong address, int opCode)
+        {
+            Address   = address;
+            RawOpCode = opCode;
+
+            Instruction = inst;
+
+            RegisterSize = RegisterSize.Int64;
+        }
+
+        public int GetPairsCount() => GetBitsCount() / 16;
+        public int GetBytesCount() => GetBitsCount() / 8;
+
+        public int GetBitsCount()
+        {
+            switch (RegisterSize)
+            {
+                case RegisterSize.Int32:   return 32;
+                case RegisterSize.Int64:   return 64;
+                case RegisterSize.Simd64:  return 64;
+                case RegisterSize.Simd128: return 128;
+            }
+
+            throw new InvalidOperationException();
+        }
+
+        public OperandType GetOperandType()
+        {
+            return RegisterSize == RegisterSize.Int32 ? OperandType.I32 : OperandType.I64;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32.cs b/ARMeilleure/Decoders/OpCode32.cs
new file mode 100644
index 000000000..20927d5e4
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32 : OpCode
+    {
+        public Condition Cond { get; protected set; }
+
+        public OpCode32(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            RegisterSize = RegisterSize.Int32;
+
+            Cond = (Condition)((uint)opCode >> 28);
+        }
+
+        public uint GetPc()
+        {
+            // Due to backwards compatibility and legacy behavior of ARMv4 CPUs pipeline,
+            // the PC actually points 2 instructions ahead.
+            return (uint)Address + (uint)OpCodeSizeInBytes * 2;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32Alu.cs b/ARMeilleure/Decoders/OpCode32Alu.cs
new file mode 100644
index 000000000..8d03baddb
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32Alu.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32Alu : OpCode32, IOpCode32Alu
+    {
+        public int Rd { get; private set; }
+        public int Rn { get; private set; }
+
+        public bool SetFlags { get; private set; }
+
+        public OpCode32Alu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rd = (opCode >> 12) & 0xf;
+            Rn = (opCode >> 16) & 0xf;
+
+            SetFlags = ((opCode >> 20) & 1) != 0;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32AluImm.cs b/ARMeilleure/Decoders/OpCode32AluImm.cs
new file mode 100644
index 000000000..bba03e4d8
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32AluImm.cs
@@ -0,0 +1,21 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+    class OpCode32AluImm : OpCode32Alu
+    {
+        public int Immediate { get; private set; }
+
+        public bool IsRotated { get; private set; }
+
+        public OpCode32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int value = (opCode >> 0) & 0xff;
+            int shift = (opCode >> 8) & 0xf;
+
+            Immediate = BitUtils.RotateRight(value, shift * 2, 32);
+
+            IsRotated = shift != 0;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32AluRsImm.cs b/ARMeilleure/Decoders/OpCode32AluRsImm.cs
new file mode 100644
index 000000000..779d6cecf
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32AluRsImm.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32AluRsImm : OpCode32Alu
+    {
+        public int Rm  { get; private set; }
+        public int Imm { get; private set; }
+
+        public ShiftType ShiftType { get; private set; }
+
+        public OpCode32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rm  = (opCode >> 0) & 0xf;
+            Imm = (opCode >> 7) & 0x1f;
+
+            ShiftType = (ShiftType)((opCode >> 5) & 3);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32BImm.cs b/ARMeilleure/Decoders/OpCode32BImm.cs
new file mode 100644
index 000000000..ea6443bc8
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32BImm.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32BImm : OpCode32, IOpCode32BImm
+    {
+        public long Immediate { get; private set; }
+
+        public OpCode32BImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            uint pc = GetPc();
+
+            // When the condition is never, the instruction is BLX to Thumb mode.
+            if (Cond != Condition.Nv)
+            {
+                pc &= ~3u;
+            }
+
+            Immediate = pc + DecoderHelper.DecodeImm24_2(opCode);
+
+            if (Cond == Condition.Nv)
+            {
+                long H = (opCode >> 23) & 2;
+
+                Immediate |= H;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32BReg.cs b/ARMeilleure/Decoders/OpCode32BReg.cs
new file mode 100644
index 000000000..ffb487070
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32BReg.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32BReg : OpCode32, IOpCode32BReg
+    {
+        public int Rm { get; private set; }
+
+        public OpCode32BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rm = opCode & 0xf;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32Mem.cs b/ARMeilleure/Decoders/OpCode32Mem.cs
new file mode 100644
index 000000000..f4e88d592
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32Mem.cs
@@ -0,0 +1,37 @@
+using ARMeilleure.Instructions;
+
+namespace ARMeilleure.Decoders
+{
+    class OpCode32Mem : OpCode32, IOpCode32Mem
+    {
+        public int Rt { get; private set; }
+        public int Rn { get; private set; }
+
+        public int Immediate { get; protected set; }
+
+        public bool Index        { get; private set; }
+        public bool Add          { get; private set; }
+        public bool WBack        { get; private set; }
+        public bool Unprivileged { get; private set; }
+
+        public bool IsLoad { get; private set; }
+
+        public OpCode32Mem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rt = (opCode >> 12) & 0xf;
+            Rn = (opCode >> 16) & 0xf;
+
+            bool isLoad = (opCode & (1 << 20)) != 0;
+            bool w      = (opCode & (1 << 21)) != 0;
+            bool u      = (opCode & (1 << 23)) != 0;
+            bool p      = (opCode & (1 << 24)) != 0;
+
+            Index        = p;
+            Add          = u;
+            WBack        = !p || w;
+            Unprivileged = !p && w;
+
+            IsLoad = isLoad || inst.Name == InstName.Ldrd;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32MemImm.cs b/ARMeilleure/Decoders/OpCode32MemImm.cs
new file mode 100644
index 000000000..f79c63197
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32MemImm.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32MemImm : OpCode32Mem
+    {
+        public OpCode32MemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Immediate = opCode & 0xfff;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32MemImm8.cs b/ARMeilleure/Decoders/OpCode32MemImm8.cs
new file mode 100644
index 000000000..08027fb75
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32MemImm8.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32MemImm8 : OpCode32Mem
+    {
+        public OpCode32MemImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int imm4L = (opCode >> 0) & 0xf;
+            int imm4H = (opCode >> 8) & 0xf;
+
+            Immediate = imm4L | (imm4H << 4);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCode32MemMult.cs b/ARMeilleure/Decoders/OpCode32MemMult.cs
new file mode 100644
index 000000000..b61b50ea8
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32MemMult.cs
@@ -0,0 +1,55 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32MemMult : OpCode32, IOpCode32MemMult
+    {
+        public int Rn { get; private set; }
+
+        public int RegisterMask { get; private set; }
+        public int Offset       { get; private set; }
+        public int PostOffset   { get; private set; }
+
+        public bool IsLoad { get; private set; }
+
+        public OpCode32MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rn = (opCode >> 16) & 0xf;
+
+            bool isLoad = (opCode & (1 << 20)) != 0;
+            bool w      = (opCode & (1 << 21)) != 0;
+            bool u      = (opCode & (1 << 23)) != 0;
+            bool p      = (opCode & (1 << 24)) != 0;
+
+            RegisterMask = opCode & 0xffff;
+
+            int regsSize = 0;
+
+            for (int index = 0; index < 16; index++)
+            {
+                regsSize += (RegisterMask >> index) & 1;
+            }
+
+            regsSize *= 4;
+
+            if (!u)
+            {
+                Offset -= regsSize;
+            }
+
+            if (u == p)
+            {
+                Offset += 4;
+            }
+
+            if (w)
+            {
+                PostOffset = u ? regsSize : -regsSize;
+            }
+            else
+            {
+                PostOffset = 0;
+            }
+
+            IsLoad = isLoad;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAdr.cs b/ARMeilleure/Decoders/OpCodeAdr.cs
new file mode 100644
index 000000000..fc8219f6c
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAdr.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeAdr : OpCode
+    {
+        public int Rd { get; private set; }
+
+        public long Immediate { get; private set; }
+
+         public OpCodeAdr(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rd = opCode & 0x1f;
+
+            Immediate  = DecoderHelper.DecodeImmS19_2(opCode);
+            Immediate |= ((long)opCode >> 29) & 3;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAlu.cs b/ARMeilleure/Decoders/OpCodeAlu.cs
new file mode 100644
index 000000000..171662a06
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAlu.cs
@@ -0,0 +1,21 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeAlu : OpCode, IOpCodeAlu
+    {
+        public int Rd { get; protected set; }
+        public int Rn { get; private   set; }
+
+        public DataOp DataOp { get; private set; }
+
+        public OpCodeAlu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rd     =          (opCode >>  0) & 0x1f;
+            Rn     =          (opCode >>  5) & 0x1f;
+            DataOp = (DataOp)((opCode >> 24) & 0x3);
+
+            RegisterSize = (opCode >> 31) != 0
+                ? RegisterSize.Int64
+                : RegisterSize.Int32;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAluBinary.cs b/ARMeilleure/Decoders/OpCodeAluBinary.cs
new file mode 100644
index 000000000..2bdf1d798
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAluBinary.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeAluBinary : OpCodeAlu
+    {
+        public int Rm { get; private set; }
+
+        public OpCodeAluBinary(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rm = (opCode >> 16) & 0x1f;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAluImm.cs b/ARMeilleure/Decoders/OpCodeAluImm.cs
new file mode 100644
index 000000000..35c83fcc3
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAluImm.cs
@@ -0,0 +1,38 @@
+using System;
+
+namespace ARMeilleure.Decoders
+{
+    class OpCodeAluImm : OpCodeAlu, IOpCodeAluImm
+    {
+        public long Immediate { get; private set; }
+
+        public OpCodeAluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            if (DataOp == DataOp.Arithmetic)
+            {
+                Immediate = (opCode >> 10) & 0xfff;
+
+                int shift = (opCode >> 22) & 3;
+
+                Immediate <<= shift * 12;
+            }
+            else if (DataOp == DataOp.Logical)
+            {
+                var bm = DecoderHelper.DecodeBitMask(opCode, true);
+
+                if (bm.IsUndefined)
+                {
+                    Instruction = InstDescriptor.Undefined;
+
+                    return;
+                }
+
+                Immediate = bm.WMask;
+            }
+            else
+            {
+                throw new ArgumentException(nameof(opCode));
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAluRs.cs b/ARMeilleure/Decoders/OpCodeAluRs.cs
new file mode 100644
index 000000000..84fb6ac6d
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAluRs.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeAluRs : OpCodeAlu, IOpCodeAluRs
+    {
+        public int Shift { get; private set; }
+        public int Rm    { get; private set; }
+
+        public ShiftType ShiftType { get; private set; }
+
+        public OpCodeAluRs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int shift = (opCode >> 10) & 0x3f;
+
+            if (shift >= GetBitsCount())
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            Shift = shift;
+
+            Rm        =             (opCode >> 16) & 0x1f;
+            ShiftType = (ShiftType)((opCode >> 22) & 0x3);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeAluRx.cs b/ARMeilleure/Decoders/OpCodeAluRx.cs
new file mode 100644
index 000000000..5c8d427e8
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeAluRx.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeAluRx : OpCodeAlu, IOpCodeAluRx
+    {
+        public int Shift { get; private set; }
+        public int Rm    { get; private set; }
+
+        public IntType IntType { get; private set; }
+
+        public OpCodeAluRx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Shift   =           (opCode >> 10) & 0x7;
+            IntType = (IntType)((opCode >> 13) & 0x7);
+            Rm      =           (opCode >> 16) & 0x1f;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImm.cs b/ARMeilleure/Decoders/OpCodeBImm.cs
new file mode 100644
index 000000000..2821a6246
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImm.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeBImm : OpCode, IOpCodeBImm
+    {
+        public long Immediate { get; protected set; }
+
+        public OpCodeBImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImmAl.cs b/ARMeilleure/Decoders/OpCodeBImmAl.cs
new file mode 100644
index 000000000..94bcea884
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImmAl.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeBImmAl : OpCodeBImm
+    {
+        public OpCodeBImmAl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Immediate = (long)address + DecoderHelper.DecodeImm26_2(opCode);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImmCmp.cs b/ARMeilleure/Decoders/OpCodeBImmCmp.cs
new file mode 100644
index 000000000..2b7c28341
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImmCmp.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeBImmCmp : OpCodeBImm
+    {
+        public int Rt { get; private set; }
+
+        public OpCodeBImmCmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rt = opCode & 0x1f;
+
+            Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+            RegisterSize = (opCode >> 31) != 0
+                ? RegisterSize.Int64
+                : RegisterSize.Int32;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImmCond.cs b/ARMeilleure/Decoders/OpCodeBImmCond.cs
new file mode 100644
index 000000000..f898821ac
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImmCond.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeBImmCond : OpCodeBImm, IOpCodeCond
+    {
+        public Condition Cond { get; private set; }
+
+        public OpCodeBImmCond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int o0 = (opCode >> 4) & 1;
+
+            if (o0 != 0)
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            Cond = (Condition)(opCode & 0xf);
+
+            Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBImmTest.cs b/ARMeilleure/Decoders/OpCodeBImmTest.cs
new file mode 100644
index 000000000..6687c2e7a
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBImmTest.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeBImmTest : OpCodeBImm
+    {
+        public int Rt  { get; private set; }
+        public int Bit { get; private set; }
+
+        public OpCodeBImmTest(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rt = opCode & 0x1f;
+
+            Immediate = (long)address + DecoderHelper.DecodeImmS14_2(opCode);
+
+            Bit  = (opCode >> 19) & 0x1f;
+            Bit |= (opCode >> 26) & 0x20;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBReg.cs b/ARMeilleure/Decoders/OpCodeBReg.cs
new file mode 100644
index 000000000..00c51ec71
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBReg.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeBReg : OpCode
+    {
+        public int Rn { get; private set; }
+
+        public OpCodeBReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int op4 = (opCode >>  0) & 0x1f;
+            int op2 = (opCode >> 16) & 0x1f;
+
+            if (op2 != 0b11111 || op4 != 0b00000)
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            Rn = (opCode >> 5) & 0x1f;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeBfm.cs b/ARMeilleure/Decoders/OpCodeBfm.cs
new file mode 100644
index 000000000..2ae8edf56
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeBfm.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeBfm : OpCodeAlu
+    {
+        public long WMask { get; private set; }
+        public long TMask { get; private set; }
+        public int  Pos   { get; private set; }
+        public int  Shift { get; private set; }
+
+        public OpCodeBfm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            var bm = DecoderHelper.DecodeBitMask(opCode, false);
+
+            if (bm.IsUndefined)
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            WMask = bm.WMask;
+            TMask = bm.TMask;
+            Pos   = bm.Pos;
+            Shift = bm.Shift;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeCcmp.cs b/ARMeilleure/Decoders/OpCodeCcmp.cs
new file mode 100644
index 000000000..c302f6a32
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeCcmp.cs
@@ -0,0 +1,30 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Decoders
+{
+    class OpCodeCcmp : OpCodeAlu, IOpCodeCond
+    {
+        public    int Nzcv { get; private set; }
+        protected int RmImm;
+
+        public Condition Cond { get; private set; }
+
+        public OpCodeCcmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int o3 = (opCode >> 4) & 1;
+
+            if (o3 != 0)
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            Nzcv  =             (opCode >>  0) & 0xf;
+            Cond  = (Condition)((opCode >> 12) & 0xf);
+            RmImm =             (opCode >> 16) & 0x1f;
+
+            Rd = RegisterAlias.Zr;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeCcmpImm.cs b/ARMeilleure/Decoders/OpCodeCcmpImm.cs
new file mode 100644
index 000000000..4a2d01f46
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeCcmpImm.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeCcmpImm : OpCodeCcmp, IOpCodeAluImm
+    {
+        public long Immediate => RmImm;
+
+        public OpCodeCcmpImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeCcmpReg.cs b/ARMeilleure/Decoders/OpCodeCcmpReg.cs
new file mode 100644
index 000000000..0e2b922cf
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeCcmpReg.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeCcmpReg : OpCodeCcmp, IOpCodeAluRs
+    {
+        public int Rm => RmImm;
+
+        public int Shift => 0;
+
+        public ShiftType ShiftType => ShiftType.Lsl;
+
+        public OpCodeCcmpReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeCsel.cs b/ARMeilleure/Decoders/OpCodeCsel.cs
new file mode 100644
index 000000000..fd07e6fd4
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeCsel.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeCsel : OpCodeAlu, IOpCodeCond
+    {
+        public int Rm { get; private set; }
+
+        public Condition Cond { get; private set; }
+
+        public OpCodeCsel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rm   =             (opCode >> 16) & 0x1f;
+            Cond = (Condition)((opCode >> 12) & 0xf);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeException.cs b/ARMeilleure/Decoders/OpCodeException.cs
new file mode 100644
index 000000000..9781c543b
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeException.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeException : OpCode
+    {
+        public int Id { get; private set; }
+
+        public OpCodeException(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Id = (opCode >> 5) & 0xffff;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMem.cs b/ARMeilleure/Decoders/OpCodeMem.cs
new file mode 100644
index 000000000..5a7ab482a
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMem.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeMem : OpCode
+    {
+        public int  Rt       { get; protected set; }
+        public int  Rn       { get; protected set; }
+        public int  Size     { get; protected set; }
+        public bool Extend64 { get; protected set; }
+
+        public OpCodeMem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rt   = (opCode >>  0) & 0x1f;
+            Rn   = (opCode >>  5) & 0x1f;
+            Size = (opCode >> 30) & 0x3;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemEx.cs b/ARMeilleure/Decoders/OpCodeMemEx.cs
new file mode 100644
index 000000000..5956f3672
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemEx.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeMemEx : OpCodeMem
+    {
+        public int Rt2 { get; private set; }
+        public int Rs  { get; private set; }
+
+        public OpCodeMemEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rt2 = (opCode >> 10) & 0x1f;
+            Rs  = (opCode >> 16) & 0x1f;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemImm.cs b/ARMeilleure/Decoders/OpCodeMemImm.cs
new file mode 100644
index 000000000..517434f29
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemImm.cs
@@ -0,0 +1,51 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeMemImm : OpCodeMem
+    {
+        public    long Immediate { get; protected set; }
+        public    bool WBack     { get; protected set; }
+        public    bool PostIdx   { get; protected set; }
+        protected bool Unscaled  { get; private   set; }
+
+        private enum MemOp
+        {
+            Unscaled     = 0,
+            PostIndexed  = 1,
+            Unprivileged = 2,
+            PreIndexed   = 3,
+            Unsigned
+        }
+
+        public OpCodeMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Extend64 = ((opCode >> 22) & 3) == 2;
+            WBack    = ((opCode >> 24) & 1) == 0;
+
+            // The type is not valid for the Unsigned Immediate 12-bits encoding,
+            // because the bits 11:10 are used for the larger Immediate offset.
+            MemOp type = WBack ? (MemOp)((opCode >> 10) & 3) : MemOp.Unsigned;
+
+            PostIdx  = type == MemOp.PostIndexed;
+            Unscaled = type == MemOp.Unscaled ||
+                       type == MemOp.Unprivileged;
+
+            // Unscaled and Unprivileged doesn't write back,
+            // but they do use the 9-bits Signed Immediate.
+            if (Unscaled)
+            {
+                WBack = false;
+            }
+
+            if (WBack || Unscaled)
+            {
+                // 9-bits Signed Immediate.
+                Immediate = (opCode << 11) >> 23;
+            }
+            else
+            {
+                // 12-bits Unsigned Immediate.
+                Immediate = ((opCode >> 10) & 0xfff) << Size;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemLit.cs b/ARMeilleure/Decoders/OpCodeMemLit.cs
new file mode 100644
index 000000000..b80585cb4
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemLit.cs
@@ -0,0 +1,26 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeMemLit : OpCode, IOpCodeLit
+    {
+        public int  Rt        { get; private set; }
+        public long Immediate { get; private set; }
+        public int  Size      { get; private set; }
+        public bool Signed    { get; private set; }
+        public bool Prefetch  { get; private set; }
+
+        public OpCodeMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rt = opCode & 0x1f;
+
+            Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+            switch ((opCode >> 30) & 3)
+            {
+                case 0: Size = 2; Signed = false; Prefetch = false; break;
+                case 1: Size = 3; Signed = false; Prefetch = false; break;
+                case 2: Size = 2; Signed = true;  Prefetch = false; break;
+                case 3: Size = 0; Signed = false; Prefetch = true;  break;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemPair.cs b/ARMeilleure/Decoders/OpCodeMemPair.cs
new file mode 100644
index 000000000..ea329a1db
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemPair.cs
@@ -0,0 +1,23 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeMemPair : OpCodeMemImm
+    {
+        public int Rt2 { get; private set; }
+
+        public OpCodeMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rt2      =  (opCode >> 10) & 0x1f;
+            WBack    = ((opCode >> 23) & 0x1) != 0;
+            PostIdx  = ((opCode >> 23) & 0x3) == 1;
+            Extend64 = ((opCode >> 30) & 0x3) == 1;
+            Size     = ((opCode >> 31) & 0x1) | 2;
+
+            DecodeImm(opCode);
+        }
+
+        protected void DecodeImm(int opCode)
+        {
+            Immediate = ((long)(opCode >> 15) << 57) >> (57 - Size);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMemReg.cs b/ARMeilleure/Decoders/OpCodeMemReg.cs
new file mode 100644
index 000000000..f5c2f9911
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMemReg.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeMemReg : OpCodeMem
+    {
+        public bool Shift { get; private set; }
+        public int  Rm    { get; private set; }
+
+        public IntType IntType { get; private set; }
+
+        public OpCodeMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Shift    =          ((opCode >> 12) & 0x1) != 0;
+            IntType  = (IntType)((opCode >> 13) & 0x7);
+            Rm       =           (opCode >> 16) & 0x1f;
+            Extend64 =          ((opCode >> 22) & 0x3) == 2;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMov.cs b/ARMeilleure/Decoders/OpCodeMov.cs
new file mode 100644
index 000000000..b65178cff
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMov.cs
@@ -0,0 +1,36 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeMov : OpCode
+    {
+        public int Rd { get; private set; }
+
+        public long Immediate { get; private set; }
+
+        public int Bit { get; private set; }
+
+        public OpCodeMov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int p1 = (opCode >> 22) & 1;
+            int sf = (opCode >> 31) & 1;
+
+            if (sf == 0 && p1 != 0)
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            Rd        = (opCode >>  0) & 0x1f;
+            Immediate = (opCode >>  5) & 0xffff;
+            Bit       = (opCode >> 21) & 0x3;
+
+            Bit <<= 4;
+
+            Immediate <<= Bit;
+
+            RegisterSize = (opCode >> 31) != 0
+                ? RegisterSize.Int64
+                : RegisterSize.Int32;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeMul.cs b/ARMeilleure/Decoders/OpCodeMul.cs
new file mode 100644
index 000000000..3eb4dc97c
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeMul.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeMul : OpCodeAlu
+    {
+        public int Rm { get; private set; }
+        public int Ra { get; private set; }
+
+        public OpCodeMul(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Ra = (opCode >> 10) & 0x1f;
+            Rm = (opCode >> 16) & 0x1f;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimd.cs b/ARMeilleure/Decoders/OpCodeSimd.cs
new file mode 100644
index 000000000..a258446c1
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimd.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimd : OpCode, IOpCodeSimd
+    {
+        public int Rd   { get; private   set; }
+        public int Rn   { get; private   set; }
+        public int Opc  { get; private   set; }
+        public int Size { get; protected set; }
+
+        public OpCodeSimd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rd   = (opCode >>  0) & 0x1f;
+            Rn   = (opCode >>  5) & 0x1f;
+            Opc  = (opCode >> 15) & 0x3;
+            Size = (opCode >> 22) & 0x3;
+
+            RegisterSize = ((opCode >> 30) & 1) != 0
+                ? RegisterSize.Simd128
+                : RegisterSize.Simd64;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdCvt.cs b/ARMeilleure/Decoders/OpCodeSimdCvt.cs
new file mode 100644
index 000000000..15658bb89
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdCvt.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdCvt : OpCodeSimd
+    {
+        public int FBits { get; private set; }
+
+        public OpCodeSimdCvt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int scale = (opCode >> 10) & 0x3f;
+            int sf    = (opCode >> 31) & 0x1;
+
+            FBits = 64 - scale;
+
+            RegisterSize = sf != 0
+                ? RegisterSize.Int64
+                : RegisterSize.Int32;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdExt.cs b/ARMeilleure/Decoders/OpCodeSimdExt.cs
new file mode 100644
index 000000000..d585449c1
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdExt.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdExt : OpCodeSimdReg
+    {
+        public int Imm4 { get; private set; }
+
+        public OpCodeSimdExt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Imm4 = (opCode >> 11) & 0xf;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdFcond.cs b/ARMeilleure/Decoders/OpCodeSimdFcond.cs
new file mode 100644
index 000000000..9e7a5f3bf
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdFcond.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdFcond : OpCodeSimdReg, IOpCodeCond
+    {
+        public int Nzcv { get; private set; }
+
+        public Condition Cond { get; private set; }
+
+        public OpCodeSimdFcond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Nzcv =             (opCode >>  0) & 0xf;
+            Cond = (Condition)((opCode >> 12) & 0xf);
+        }
+    }
+}
diff --git a/ARMeilleure/Decoders/OpCodeSimdFmov.cs b/ARMeilleure/Decoders/OpCodeSimdFmov.cs
new file mode 100644
index 000000000..61a3f077d
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdFmov.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdFmov : OpCode, IOpCodeSimd
+    {
+        public int  Rd        { get; private set; }
+        public long Immediate { get; private set; }
+        public int  Size      { get; private set; }
+
+        public OpCodeSimdFmov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int imm5 = (opCode >>  5) & 0x1f;
+            int type = (opCode >> 22) & 0x3;
+
+            if (imm5 != 0b00000 || type > 1)
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            Size = type;
+
+            long imm;
+
+            Rd  = (opCode >>  0) & 0x1f;
+            imm = (opCode >> 13) & 0xff;
+
+            Immediate = DecoderHelper.DecodeImm8Float(imm, type);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdImm.cs b/ARMeilleure/Decoders/OpCodeSimdImm.cs
new file mode 100644
index 000000000..ecad906d9
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdImm.cs
@@ -0,0 +1,98 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdImm : OpCode, IOpCodeSimd
+    {
+        public int  Rd        { get; private set; }
+        public long Immediate { get; private set; }
+        public int  Size      { get; private set; }
+
+        public OpCodeSimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rd = opCode & 0x1f;
+
+            int cMode = (opCode >> 12) & 0xf;
+            int op    = (opCode >> 29) & 0x1;
+
+            int modeLow  = cMode &  1;
+            int modeHigh = cMode >> 1;
+
+            long imm;
+
+            imm  = ((uint)opCode >>  5) & 0x1f;
+            imm |= ((uint)opCode >> 11) & 0xe0;
+
+            if (modeHigh == 0b111)
+            {
+                Size = modeLow != 0 ? op : 3;
+
+                switch (op | (modeLow << 1))
+                {
+                    case 0:
+                        // 64-bits Immediate.
+                        // Transform abcd efgh into abcd efgh abcd efgh ...
+                        imm = (long)((ulong)imm * 0x0101010101010101);
+                        break;
+
+                    case 1:
+                        // 64-bits Immediate.
+                        // Transform abcd efgh into aaaa aaaa bbbb bbbb ...
+                        imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4;
+                        imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2;
+                        imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1;
+
+                        imm = (long)((ulong)imm * 0x8040201008040201);
+                        imm = (long)((ulong)imm & 0x8080808080808080);
+
+                        imm |= imm >> 4;
+                        imm |= imm >> 2;
+                        imm |= imm >> 1;
+                        break;
+
+                    case 2:
+                    case 3:
+                        // Floating point Immediate.
+                        imm = DecoderHelper.DecodeImm8Float(imm, Size);
+                        break;
+                }
+            }
+            else if ((modeHigh & 0b110) == 0b100)
+            {
+                // 16-bits shifted Immediate.
+                Size = 1; imm <<= (modeHigh & 1) << 3;
+            }
+            else if ((modeHigh & 0b100) == 0b000)
+            {
+                // 32-bits shifted Immediate.
+                Size = 2; imm <<= modeHigh << 3;
+            }
+            else if ((modeHigh & 0b111) == 0b110)
+            {
+                // 32-bits shifted Immediate (fill with ones).
+                Size = 2; imm = ShlOnes(imm, 8 << modeLow);
+            }
+            else
+            {
+                // 8 bits without shift.
+                Size = 0;
+            }
+
+            Immediate = imm;
+
+            RegisterSize = ((opCode >> 30) & 1) != 0
+                ? RegisterSize.Simd128
+                : RegisterSize.Simd64;
+        }
+
+        private static long ShlOnes(long value, int shift)
+        {
+            if (shift != 0)
+            {
+                return value << shift | (long)(ulong.MaxValue >> (64 - shift));
+            }
+            else
+            {
+                return value;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdIns.cs b/ARMeilleure/Decoders/OpCodeSimdIns.cs
new file mode 100644
index 000000000..78328adb5
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdIns.cs
@@ -0,0 +1,34 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdIns : OpCodeSimd
+    {
+        public int SrcIndex { get; private set; }
+        public int DstIndex { get; private set; }
+
+        public OpCodeSimdIns(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int imm4 = (opCode >> 11) & 0xf;
+            int imm5 = (opCode >> 16) & 0x1f;
+
+            if (imm5 == 0b10000)
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            Size = imm5 & -imm5;
+
+            switch (Size)
+            {
+                case 1: Size = 0; break;
+                case 2: Size = 1; break;
+                case 4: Size = 2; break;
+                case 8: Size = 3; break;
+            }
+
+            SrcIndex = imm4 >>  Size;
+            DstIndex = imm5 >> (Size + 1);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemImm.cs b/ARMeilleure/Decoders/OpCodeSimdMemImm.cs
new file mode 100644
index 000000000..6b9e66d93
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemImm.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdMemImm : OpCodeMemImm, IOpCodeSimd
+    {
+        public OpCodeSimdMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Size |= (opCode >> 21) & 4;
+
+            if (!WBack && !Unscaled && Size >= 4)
+            {
+                Immediate <<= 4;
+            }
+
+            Extend64 = false;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemLit.cs b/ARMeilleure/Decoders/OpCodeSimdMemLit.cs
new file mode 100644
index 000000000..607df1392
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemLit.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdMemLit : OpCode, IOpCodeSimd, IOpCodeLit
+    {
+        public int  Rt        { get; private set; }
+        public long Immediate { get; private set; }
+        public int  Size      { get; private set; }
+        public bool Signed   => false;
+        public bool Prefetch => false;
+
+        public OpCodeSimdMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int opc = (opCode >> 30) & 3;
+
+            if (opc == 3)
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            Rt = opCode & 0x1f;
+
+            Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode);
+
+            Size = opc + 2;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemMs.cs b/ARMeilleure/Decoders/OpCodeSimdMemMs.cs
new file mode 100644
index 000000000..9fa5ff42c
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemMs.cs
@@ -0,0 +1,46 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdMemMs : OpCodeMemReg, IOpCodeSimd
+    {
+        public int  Reps   { get; private set; }
+        public int  SElems { get; private set; }
+        public int  Elems  { get; private set; }
+        public bool WBack  { get; private set; }
+
+        public OpCodeSimdMemMs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            switch ((opCode >> 12) & 0xf)
+            {
+                case 0b0000: Reps = 1; SElems = 4; break;
+                case 0b0010: Reps = 4; SElems = 1; break;
+                case 0b0100: Reps = 1; SElems = 3; break;
+                case 0b0110: Reps = 3; SElems = 1; break;
+                case 0b0111: Reps = 1; SElems = 1; break;
+                case 0b1000: Reps = 1; SElems = 2; break;
+                case 0b1010: Reps = 2; SElems = 1; break;
+
+                default: Instruction = InstDescriptor.Undefined; return;
+            }
+
+            Size  =  (opCode >> 10) & 3;
+            WBack = ((opCode >> 23) & 1) != 0;
+
+            bool q = ((opCode >> 30) & 1) != 0;
+
+            if (!q && Size == 3 && SElems != 1)
+            {
+                Instruction = InstDescriptor.Undefined;
+
+                return;
+            }
+
+            Extend64 = false;
+
+            RegisterSize = q
+                ? RegisterSize.Simd128
+                : RegisterSize.Simd64;
+
+            Elems = (GetBitsCount() >> 3) >> Size;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemPair.cs b/ARMeilleure/Decoders/OpCodeSimdMemPair.cs
new file mode 100644
index 000000000..a4af49d02
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemPair.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdMemPair : OpCodeMemPair, IOpCodeSimd
+    {
+        public OpCodeSimdMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Size = ((opCode >> 30) & 3) + 2;
+
+            Extend64 = false;
+
+            DecodeImm(opCode);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemReg.cs b/ARMeilleure/Decoders/OpCodeSimdMemReg.cs
new file mode 100644
index 000000000..7b783d63d
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemReg.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdMemReg : OpCodeMemReg, IOpCodeSimd
+    {
+        public OpCodeSimdMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Size |= (opCode >> 21) & 4;
+
+            Extend64 = false;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdMemSs.cs b/ARMeilleure/Decoders/OpCodeSimdMemSs.cs
new file mode 100644
index 000000000..302decbcc
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdMemSs.cs
@@ -0,0 +1,95 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdMemSs : OpCodeMemReg, IOpCodeSimd
+    {
+        public int  SElems    { get; private set; }
+        public int  Index     { get; private set; }
+        public bool Replicate { get; private set; }
+        public bool WBack     { get; private set; }
+
+        public OpCodeSimdMemSs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int size   = (opCode >> 10) & 3;
+            int s      = (opCode >> 12) & 1;
+            int sElems = (opCode >> 12) & 2;
+            int scale  = (opCode >> 14) & 3;
+            int l      = (opCode >> 22) & 1;
+            int q      = (opCode >> 30) & 1;
+
+            sElems |= (opCode >> 21) & 1;
+
+            sElems++;
+
+            int index = (q << 3) | (s << 2) | size;
+
+            switch (scale)
+            {
+                case 1:
+                {
+                    if ((size & 1) != 0)
+                    {
+                        Instruction = InstDescriptor.Undefined;
+
+                        return;
+                    }
+
+                    index >>= 1;
+
+                    break;
+                }
+
+                case 2:
+                {
+                    if ((size & 2) != 0 ||
+                       ((size & 1) != 0 && s != 0))
+                    {
+                        Instruction = InstDescriptor.Undefined;
+
+                        return;
+                    }
+
+                    if ((size & 1) != 0)
+                    {
+                        index >>= 3;
+
+                        scale = 3;
+                    }
+                    else
+                    {
+                        index >>= 2;
+                    }
+
+                    break;
+                }
+
+                case 3:
+                {
+                    if (l == 0 || s != 0)
+                    {
+                        Instruction = InstDescriptor.Undefined;
+
+                        return;
+                    }
+
+                    scale = size;
+
+                    Replicate = true;
+
+                    break;
+                }
+            }
+
+            Index  = index;
+            SElems = sElems;
+            Size   = scale;
+
+            Extend64 = false;
+
+            WBack = ((opCode >> 23) & 1) != 0;
+
+            RegisterSize = q != 0
+                ? RegisterSize.Simd128
+                : RegisterSize.Simd64;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdReg.cs b/ARMeilleure/Decoders/OpCodeSimdReg.cs
new file mode 100644
index 000000000..d076806a6
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdReg.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdReg : OpCodeSimd
+    {
+        public bool Bit3 { get; private   set; }
+        public int  Ra   { get; private   set; }
+        public int  Rm   { get; protected set; }
+
+        public OpCodeSimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Bit3 = ((opCode >>  3) & 0x1) != 0;
+            Ra   =  (opCode >> 10) & 0x1f;
+            Rm   =  (opCode >> 16) & 0x1f;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdRegElem.cs b/ARMeilleure/Decoders/OpCodeSimdRegElem.cs
new file mode 100644
index 000000000..d2f1583d2
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdRegElem.cs
@@ -0,0 +1,29 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdRegElem : OpCodeSimdReg
+    {
+        public int Index { get; private set; }
+
+        public OpCodeSimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            switch (Size)
+            {
+                case 1:
+                    Index = (opCode >> 20) & 3 |
+                            (opCode >>  9) & 4;
+
+                    Rm &= 0xf;
+
+                    break;
+
+                case 2:
+                    Index = (opCode >> 21) & 1 |
+                            (opCode >> 10) & 2;
+
+                    break;
+
+                default: Instruction = InstDescriptor.Undefined; break;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs b/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs
new file mode 100644
index 000000000..365b77172
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs
@@ -0,0 +1,31 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdRegElemF : OpCodeSimdReg
+    {
+        public int Index { get; private set; }
+
+        public OpCodeSimdRegElemF(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            switch ((opCode >> 21) & 3) // sz:L
+            {
+                case 0: // H:0
+                    Index = (opCode >> 10) & 2; // 0, 2
+
+                    break;
+
+                case 1: // H:1
+                    Index = (opCode >> 10) & 2;
+                    Index++; // 1, 3
+
+                    break;
+
+                case 2: // H
+                    Index = (opCode >> 11) & 1; // 0, 1
+
+                    break;
+
+                default: Instruction = InstDescriptor.Undefined; break;
+            }
+        }
+    }
+}
diff --git a/ARMeilleure/Decoders/OpCodeSimdShImm.cs b/ARMeilleure/Decoders/OpCodeSimdShImm.cs
new file mode 100644
index 000000000..d260c4b3e
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdShImm.cs
@@ -0,0 +1,16 @@
+using ARMeilleure.Common;
+
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdShImm : OpCodeSimd
+    {
+        public int Imm { get; private set; }
+
+        public OpCodeSimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Imm = (opCode >> 16) & 0x7f;
+
+            Size = BitUtils.HighestBitSetNibble(Imm >> 3);
+        }
+    }
+}
diff --git a/ARMeilleure/Decoders/OpCodeSimdTbl.cs b/ARMeilleure/Decoders/OpCodeSimdTbl.cs
new file mode 100644
index 000000000..14fdd6489
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSimdTbl.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSimdTbl : OpCodeSimdReg
+    {
+        public OpCodeSimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Size = ((opCode >> 13) & 3) + 1;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeSystem.cs b/ARMeilleure/Decoders/OpCodeSystem.cs
new file mode 100644
index 000000000..cf7c5cc15
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeSystem.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeSystem : OpCode
+    {
+        public int Rt  { get; private set; }
+        public int Op2 { get; private set; }
+        public int CRm { get; private set; }
+        public int CRn { get; private set; }
+        public int Op1 { get; private set; }
+        public int Op0 { get; private set; }
+
+        public OpCodeSystem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rt  =  (opCode >>  0) & 0x1f;
+            Op2 =  (opCode >>  5) & 0x7;
+            CRm =  (opCode >>  8) & 0xf;
+            CRn =  (opCode >> 12) & 0xf;
+            Op1 =  (opCode >> 16) & 0x7;
+            Op0 = ((opCode >> 19) & 0x1) | 2;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeT16.cs b/ARMeilleure/Decoders/OpCodeT16.cs
new file mode 100644
index 000000000..e7b7aff53
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeT16.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeT16 : OpCode32
+    {
+        public OpCodeT16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Cond = Condition.Al;
+
+            OpCodeSizeInBytes = 2;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeT16AluImm8.cs b/ARMeilleure/Decoders/OpCodeT16AluImm8.cs
new file mode 100644
index 000000000..197d3b091
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeT16AluImm8.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeT16AluImm8 : OpCodeT16, IOpCode32Alu
+    {
+        private int _rdn;
+
+        public int Rd => _rdn;
+        public int Rn => _rdn;
+
+        public bool SetFlags => false;
+
+        public int Immediate { get; private set; }
+
+        public OpCodeT16AluImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Immediate = (opCode >> 0) & 0xff;
+            _rdn      = (opCode >> 8) & 0x7;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeT16BReg.cs b/ARMeilleure/Decoders/OpCodeT16BReg.cs
new file mode 100644
index 000000000..1fb397591
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeT16BReg.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCodeT16BReg : OpCodeT16, IOpCode32BReg
+    {
+        public int Rm { get; private set; }
+
+        public OpCodeT16BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            Rm = (opCode >> 3) & 0xf;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
new file mode 100644
index 000000000..22c762d62
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -0,0 +1,787 @@
+using ARMeilleure.Instructions;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.Decoders
+{
+    static class OpCodeTable
+    {
+        private const int FastLookupSize = 0x1000;
+
+        private struct InstInfo
+        {
+            public int Mask  { get; }
+            public int Value { get; }
+
+            public InstDescriptor Inst { get; }
+
+            public Type Type { get; }
+
+            public InstInfo(int mask, int value, InstDescriptor inst, Type type)
+            {
+                Mask  = mask;
+                Value = value;
+                Inst  = inst;
+                Type  = type;
+            }
+        }
+
+        private static List<InstInfo> _allInstA32 = new List<InstInfo>();
+        private static List<InstInfo> _allInstT32 = new List<InstInfo>();
+        private static List<InstInfo> _allInstA64 = new List<InstInfo>();
+
+        private static InstInfo[][] _instA32FastLookup = new InstInfo[FastLookupSize][];
+        private static InstInfo[][] _instT32FastLookup = new InstInfo[FastLookupSize][];
+        private static InstInfo[][] _instA64FastLookup = new InstInfo[FastLookupSize][];
+
+        static OpCodeTable()
+        {
+#region "OpCode Table (AArch64)"
+            // Base
+            SetA64("x0011010000xxxxx000000xxxxxxxxxx", InstName.Adc,             InstEmit.Adc,             typeof(OpCodeAluRs));
+            SetA64("x0111010000xxxxx000000xxxxxxxxxx", InstName.Adcs,            InstEmit.Adcs,            typeof(OpCodeAluRs));
+            SetA64("x00100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Add,             InstEmit.Add,             typeof(OpCodeAluImm));
+            SetA64("00001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Add,             InstEmit.Add,             typeof(OpCodeAluRs));
+            SetA64("10001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Add,             InstEmit.Add,             typeof(OpCodeAluRs));
+            SetA64("x0001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Add,             InstEmit.Add,             typeof(OpCodeAluRx));
+            SetA64("x0001011001xxxxxxxx100xxxxxxxxxx", InstName.Add,             InstEmit.Add,             typeof(OpCodeAluRx));
+            SetA64("x01100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Adds,            InstEmit.Adds,            typeof(OpCodeAluImm));
+            SetA64("00101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Adds,            InstEmit.Adds,            typeof(OpCodeAluRs));
+            SetA64("10101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Adds,            InstEmit.Adds,            typeof(OpCodeAluRs));
+            SetA64("x0101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Adds,            InstEmit.Adds,            typeof(OpCodeAluRx));
+            SetA64("x0101011001xxxxxxxx100xxxxxxxxxx", InstName.Adds,            InstEmit.Adds,            typeof(OpCodeAluRx));
+            SetA64("0xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adr,             InstEmit.Adr,             typeof(OpCodeAdr));
+            SetA64("1xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adrp,            InstEmit.Adrp,            typeof(OpCodeAdr));
+            SetA64("0001001000xxxxxxxxxxxxxxxxxxxxxx", InstName.And,             InstEmit.And,             typeof(OpCodeAluImm));
+            SetA64("100100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.And,             InstEmit.And,             typeof(OpCodeAluImm));
+            SetA64("00001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.And,             InstEmit.And,             typeof(OpCodeAluRs));
+            SetA64("10001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.And,             InstEmit.And,             typeof(OpCodeAluRs));
+            SetA64("0111001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Ands,            InstEmit.Ands,            typeof(OpCodeAluImm));
+            SetA64("111100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ands,            InstEmit.Ands,            typeof(OpCodeAluImm));
+            SetA64("01101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Ands,            InstEmit.Ands,            typeof(OpCodeAluRs));
+            SetA64("11101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Ands,            InstEmit.Ands,            typeof(OpCodeAluRs));
+            SetA64("x0011010110xxxxx001010xxxxxxxxxx", InstName.Asrv,            InstEmit.Asrv,            typeof(OpCodeAluRs));
+            SetA64("000101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.B,               InstEmit.B,               typeof(OpCodeBImmAl));
+            SetA64("01010100xxxxxxxxxxxxxxxxxxx0xxxx", InstName.B_Cond,          InstEmit.B_Cond,          typeof(OpCodeBImmCond));
+            SetA64("00110011000xxxxx0xxxxxxxxxxxxxxx", InstName.Bfm,             InstEmit.Bfm,             typeof(OpCodeBfm));
+            SetA64("1011001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Bfm,             InstEmit.Bfm,             typeof(OpCodeBfm));
+            SetA64("00001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bic,             InstEmit.Bic,             typeof(OpCodeAluRs));
+            SetA64("10001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bic,             InstEmit.Bic,             typeof(OpCodeAluRs));
+            SetA64("01101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bics,            InstEmit.Bics,            typeof(OpCodeAluRs));
+            SetA64("11101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bics,            InstEmit.Bics,            typeof(OpCodeAluRs));
+            SetA64("100101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl,              InstEmit.Bl,              typeof(OpCodeBImmAl));
+            SetA64("1101011000111111000000xxxxx00000", InstName.Blr,             InstEmit.Blr,             typeof(OpCodeBReg));
+            SetA64("1101011000011111000000xxxxx00000", InstName.Br,              InstEmit.Br,              typeof(OpCodeBReg));
+            SetA64("11010100001xxxxxxxxxxxxxxxx00000", InstName.Brk,             InstEmit.Brk,             typeof(OpCodeException));
+            SetA64("x0110101xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbnz,            InstEmit.Cbnz,            typeof(OpCodeBImmCmp));
+            SetA64("x0110100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbz,             InstEmit.Cbz,             typeof(OpCodeBImmCmp));
+            SetA64("x0111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmn,            InstEmit.Ccmn,            typeof(OpCodeCcmpImm));
+            SetA64("x0111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmn,            InstEmit.Ccmn,            typeof(OpCodeCcmpReg));
+            SetA64("x1111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmp,            InstEmit.Ccmp,            typeof(OpCodeCcmpImm));
+            SetA64("x1111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmp,            InstEmit.Ccmp,            typeof(OpCodeCcmpReg));
+            SetA64("11010101000000110011xxxx01011111", InstName.Clrex,           InstEmit.Clrex,           typeof(OpCodeSystem));
+            SetA64("x101101011000000000101xxxxxxxxxx", InstName.Cls,             InstEmit.Cls,             typeof(OpCodeAlu));
+            SetA64("x101101011000000000100xxxxxxxxxx", InstName.Clz,             InstEmit.Clz,             typeof(OpCodeAlu));
+            SetA64("00011010110xxxxx010000xxxxxxxxxx", InstName.Crc32b,          InstEmit.Crc32b,          typeof(OpCodeAluBinary));
+            SetA64("00011010110xxxxx010001xxxxxxxxxx", InstName.Crc32h,          InstEmit.Crc32h,          typeof(OpCodeAluBinary));
+            SetA64("00011010110xxxxx010010xxxxxxxxxx", InstName.Crc32w,          InstEmit.Crc32w,          typeof(OpCodeAluBinary));
+            SetA64("10011010110xxxxx010011xxxxxxxxxx", InstName.Crc32x,          InstEmit.Crc32x,          typeof(OpCodeAluBinary));
+            SetA64("00011010110xxxxx010100xxxxxxxxxx", InstName.Crc32cb,         InstEmit.Crc32cb,         typeof(OpCodeAluBinary));
+            SetA64("00011010110xxxxx010101xxxxxxxxxx", InstName.Crc32ch,         InstEmit.Crc32ch,         typeof(OpCodeAluBinary));
+            SetA64("00011010110xxxxx010110xxxxxxxxxx", InstName.Crc32cw,         InstEmit.Crc32cw,         typeof(OpCodeAluBinary));
+            SetA64("10011010110xxxxx010111xxxxxxxxxx", InstName.Crc32cx,         InstEmit.Crc32cx,         typeof(OpCodeAluBinary));
+            SetA64("x0011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csel,            InstEmit.Csel,            typeof(OpCodeCsel));
+            SetA64("x0011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csinc,           InstEmit.Csinc,           typeof(OpCodeCsel));
+            SetA64("x1011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csinv,           InstEmit.Csinv,           typeof(OpCodeCsel));
+            SetA64("x1011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csneg,           InstEmit.Csneg,           typeof(OpCodeCsel));
+            SetA64("11010101000000110011xxxx10111111", InstName.Dmb,             InstEmit.Dmb,             typeof(OpCodeSystem));
+            SetA64("11010101000000110011xxxx10011111", InstName.Dsb,             InstEmit.Dsb,             typeof(OpCodeSystem));
+            SetA64("01001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Eon,             InstEmit.Eon,             typeof(OpCodeAluRs));
+            SetA64("11001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Eon,             InstEmit.Eon,             typeof(OpCodeAluRs));
+            SetA64("0101001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Eor,             InstEmit.Eor,             typeof(OpCodeAluImm));
+            SetA64("110100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Eor,             InstEmit.Eor,             typeof(OpCodeAluImm));
+            SetA64("01001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Eor,             InstEmit.Eor,             typeof(OpCodeAluRs));
+            SetA64("11001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Eor,             InstEmit.Eor,             typeof(OpCodeAluRs));
+            SetA64("00010011100xxxxx0xxxxxxxxxxxxxxx", InstName.Extr,            InstEmit.Extr,            typeof(OpCodeAluRs));
+            SetA64("10010011110xxxxxxxxxxxxxxxxxxxxx", InstName.Extr,            InstEmit.Extr,            typeof(OpCodeAluRs));
+            SetA64("11010101000000110010xxxxxxx11111", InstName.Hint,            InstEmit.Hint,            typeof(OpCodeSystem));
+            SetA64("11010101000000110011xxxx11011111", InstName.Isb,             InstEmit.Isb,             typeof(OpCodeSystem));
+            SetA64("xx001000110xxxxx1xxxxxxxxxxxxxxx", InstName.Ldar,            InstEmit.Ldar,            typeof(OpCodeMemEx));
+            SetA64("1x001000011xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxp,           InstEmit.Ldaxp,           typeof(OpCodeMemEx));
+            SetA64("xx001000010xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxr,           InstEmit.Ldaxr,           typeof(OpCodeMemEx));
+            SetA64("<<10100xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp,             InstEmit.Ldp,             typeof(OpCodeMemPair));
+            SetA64("xx111000010xxxxxxxxxxxxxxxxxxxxx", InstName.Ldr,             InstEmit.Ldr,             typeof(OpCodeMemImm));
+            SetA64("xx11100101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr,             InstEmit.Ldr,             typeof(OpCodeMemImm));
+            SetA64("xx111000011xxxxxxxxx10xxxxxxxxxx", InstName.Ldr,             InstEmit.Ldr,             typeof(OpCodeMemReg));
+            SetA64("xx011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal,     InstEmit.Ldr_Literal,     typeof(OpCodeMemLit));
+            SetA64("0x1110001x0xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs,            InstEmit.Ldrs,            typeof(OpCodeMemImm));
+            SetA64("0x1110011xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs,            InstEmit.Ldrs,            typeof(OpCodeMemImm));
+            SetA64("10111000100xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs,            InstEmit.Ldrs,            typeof(OpCodeMemImm));
+            SetA64("1011100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs,            InstEmit.Ldrs,            typeof(OpCodeMemImm));
+            SetA64("0x1110001x1xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs,            InstEmit.Ldrs,            typeof(OpCodeMemReg));
+            SetA64("10111000101xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs,            InstEmit.Ldrs,            typeof(OpCodeMemReg));
+            SetA64("xx001000010xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxr,            InstEmit.Ldxr,            typeof(OpCodeMemEx));
+            SetA64("1x001000011xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxp,            InstEmit.Ldxp,            typeof(OpCodeMemEx));
+            SetA64("x0011010110xxxxx001000xxxxxxxxxx", InstName.Lslv,            InstEmit.Lslv,            typeof(OpCodeAluRs));
+            SetA64("x0011010110xxxxx001001xxxxxxxxxx", InstName.Lsrv,            InstEmit.Lsrv,            typeof(OpCodeAluRs));
+            SetA64("x0011011000xxxxx0xxxxxxxxxxxxxxx", InstName.Madd,            InstEmit.Madd,            typeof(OpCodeMul));
+            SetA64("0111001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movk,            InstEmit.Movk,            typeof(OpCodeMov));
+            SetA64("111100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movk,            InstEmit.Movk,            typeof(OpCodeMov));
+            SetA64("0001001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movn,            InstEmit.Movn,            typeof(OpCodeMov));
+            SetA64("100100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movn,            InstEmit.Movn,            typeof(OpCodeMov));
+            SetA64("0101001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movz,            InstEmit.Movz,            typeof(OpCodeMov));
+            SetA64("110100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movz,            InstEmit.Movz,            typeof(OpCodeMov));
+            SetA64("110101010011xxxxxxxxxxxxxxxxxxxx", InstName.Mrs,             InstEmit.Mrs,             typeof(OpCodeSystem));
+            SetA64("110101010001xxxxxxxxxxxxxxxxxxxx", InstName.Msr,             InstEmit.Msr,             typeof(OpCodeSystem));
+            SetA64("x0011011000xxxxx1xxxxxxxxxxxxxxx", InstName.Msub,            InstEmit.Msub,            typeof(OpCodeMul));
+            SetA64("11010101000000110010000000011111", InstName.Nop,             InstEmit.Nop,             typeof(OpCodeSystem));
+            SetA64("00101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Orn,             InstEmit.Orn,             typeof(OpCodeAluRs));
+            SetA64("10101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Orn,             InstEmit.Orn,             typeof(OpCodeAluRs));
+            SetA64("0011001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Orr,             InstEmit.Orr,             typeof(OpCodeAluImm));
+            SetA64("101100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Orr,             InstEmit.Orr,             typeof(OpCodeAluImm));
+            SetA64("00101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Orr,             InstEmit.Orr,             typeof(OpCodeAluRs));
+            SetA64("10101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Orr,             InstEmit.Orr,             typeof(OpCodeAluRs));
+            SetA64("1111100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Pfrm,            InstEmit.Pfrm,            typeof(OpCodeMemImm));
+            SetA64("11111000100xxxxxxxxx00xxxxxxxxxx", InstName.Pfrm,            InstEmit.Pfrm,            typeof(OpCodeMemImm));
+            SetA64("11011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pfrm,            InstEmit.Pfrm,            typeof(OpCodeMemLit));
+            SetA64("x101101011000000000000xxxxxxxxxx", InstName.Rbit,            InstEmit.Rbit,            typeof(OpCodeAlu));
+            SetA64("1101011001011111000000xxxxx00000", InstName.Ret,             InstEmit.Ret,             typeof(OpCodeBReg));
+            SetA64("x101101011000000000001xxxxxxxxxx", InstName.Rev16,           InstEmit.Rev16,           typeof(OpCodeAlu));
+            SetA64("x101101011000000000010xxxxxxxxxx", InstName.Rev32,           InstEmit.Rev32,           typeof(OpCodeAlu));
+            SetA64("1101101011000000000011xxxxxxxxxx", InstName.Rev64,           InstEmit.Rev64,           typeof(OpCodeAlu));
+            SetA64("x0011010110xxxxx001011xxxxxxxxxx", InstName.Rorv,            InstEmit.Rorv,            typeof(OpCodeAluRs));
+            SetA64("x1011010000xxxxx000000xxxxxxxxxx", InstName.Sbc,             InstEmit.Sbc,             typeof(OpCodeAluRs));
+            SetA64("x1111010000xxxxx000000xxxxxxxxxx", InstName.Sbcs,            InstEmit.Sbcs,            typeof(OpCodeAluRs));
+            SetA64("00010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Sbfm,            InstEmit.Sbfm,            typeof(OpCodeBfm));
+            SetA64("1001001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Sbfm,            InstEmit.Sbfm,            typeof(OpCodeBfm));
+            SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv,            InstEmit.Sdiv,            typeof(OpCodeAluBinary));
+            SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl,          InstEmit.Smaddl,          typeof(OpCodeMul));
+            SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl,          InstEmit.Smsubl,          typeof(OpCodeMul));
+            SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh,           InstEmit.Smulh,           typeof(OpCodeMul));
+            SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr,            InstEmit.Stlr,            typeof(OpCodeMemEx));
+            SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp,           InstEmit.Stlxp,           typeof(OpCodeMemEx));
+            SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr,           InstEmit.Stlxr,           typeof(OpCodeMemEx));
+            SetA64("x010100xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp,             InstEmit.Stp,             typeof(OpCodeMemPair));
+            SetA64("xx111000000xxxxxxxxxxxxxxxxxxxxx", InstName.Str,             InstEmit.Str,             typeof(OpCodeMemImm));
+            SetA64("xx11100100xxxxxxxxxxxxxxxxxxxxxx", InstName.Str,             InstEmit.Str,             typeof(OpCodeMemImm));
+            SetA64("xx111000001xxxxxxxxx10xxxxxxxxxx", InstName.Str,             InstEmit.Str,             typeof(OpCodeMemReg));
+            SetA64("1x001000001xxxxx0xxxxxxxxxxxxxxx", InstName.Stxp,            InstEmit.Stxp,            typeof(OpCodeMemEx));
+            SetA64("xx001000000xxxxx0xxxxxxxxxxxxxxx", InstName.Stxr,            InstEmit.Stxr,            typeof(OpCodeMemEx));
+            SetA64("x10100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Sub,             InstEmit.Sub,             typeof(OpCodeAluImm));
+            SetA64("01001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Sub,             InstEmit.Sub,             typeof(OpCodeAluRs));
+            SetA64("11001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Sub,             InstEmit.Sub,             typeof(OpCodeAluRs));
+            SetA64("x1001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Sub,             InstEmit.Sub,             typeof(OpCodeAluRx));
+            SetA64("x1001011001xxxxxxxx100xxxxxxxxxx", InstName.Sub,             InstEmit.Sub,             typeof(OpCodeAluRx));
+            SetA64("x11100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Subs,            InstEmit.Subs,            typeof(OpCodeAluImm));
+            SetA64("01101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Subs,            InstEmit.Subs,            typeof(OpCodeAluRs));
+            SetA64("11101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Subs,            InstEmit.Subs,            typeof(OpCodeAluRs));
+            SetA64("x1101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Subs,            InstEmit.Subs,            typeof(OpCodeAluRx));
+            SetA64("x1101011001xxxxxxxx100xxxxxxxxxx", InstName.Subs,            InstEmit.Subs,            typeof(OpCodeAluRx));
+            SetA64("11010100000xxxxxxxxxxxxxxxx00001", InstName.Svc,             InstEmit.Svc,             typeof(OpCodeException));
+            SetA64("1101010100001xxxxxxxxxxxxxxxxxxx", InstName.Sys,             InstEmit.Sys,             typeof(OpCodeSystem));
+            SetA64("x0110111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbnz,            InstEmit.Tbnz,            typeof(OpCodeBImmTest));
+            SetA64("x0110110xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbz,             InstEmit.Tbz,             typeof(OpCodeBImmTest));
+            SetA64("01010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Ubfm,            InstEmit.Ubfm,            typeof(OpCodeBfm));
+            SetA64("1101001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ubfm,            InstEmit.Ubfm,            typeof(OpCodeBfm));
+            SetA64("x0011010110xxxxx000010xxxxxxxxxx", InstName.Udiv,            InstEmit.Udiv,            typeof(OpCodeAluBinary));
+            SetA64("10011011101xxxxx0xxxxxxxxxxxxxxx", InstName.Umaddl,          InstEmit.Umaddl,          typeof(OpCodeMul));
+            SetA64("10011011101xxxxx1xxxxxxxxxxxxxxx", InstName.Umsubl,          InstEmit.Umsubl,          typeof(OpCodeMul));
+            SetA64("10011011110xxxxx0xxxxxxxxxxxxxxx", InstName.Umulh,           InstEmit.Umulh,           typeof(OpCodeMul));
+
+            // FP & SIMD
+            SetA64("0101111011100000101110xxxxxxxxxx", InstName.Abs_S,           InstEmit.Abs_S,           typeof(OpCodeSimd));
+            SetA64("0>001110<<100000101110xxxxxxxxxx", InstName.Abs_V,           InstEmit.Abs_V,           typeof(OpCodeSimd));
+            SetA64("01011110111xxxxx100001xxxxxxxxxx", InstName.Add_S,           InstEmit.Add_S,           typeof(OpCodeSimdReg));
+            SetA64("0>001110<<1xxxxx100001xxxxxxxxxx", InstName.Add_V,           InstEmit.Add_V,           typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx010000xxxxxxxxxx", InstName.Addhn_V,         InstEmit.Addhn_V,         typeof(OpCodeSimdReg));
+            SetA64("0101111011110001101110xxxxxxxxxx", InstName.Addp_S,          InstEmit.Addp_S,          typeof(OpCodeSimd));
+            SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", InstName.Addp_V,          InstEmit.Addp_V,          typeof(OpCodeSimdReg));
+            SetA64("000011100x110001101110xxxxxxxxxx", InstName.Addv_V,          InstEmit.Addv_V,          typeof(OpCodeSimd));
+            SetA64("01001110<<110001101110xxxxxxxxxx", InstName.Addv_V,          InstEmit.Addv_V,          typeof(OpCodeSimd));
+            SetA64("0100111000101000010110xxxxxxxxxx", InstName.Aesd_V,          InstEmit.Aesd_V,          typeof(OpCodeSimd));
+            SetA64("0100111000101000010010xxxxxxxxxx", InstName.Aese_V,          InstEmit.Aese_V,          typeof(OpCodeSimd));
+            SetA64("0100111000101000011110xxxxxxxxxx", InstName.Aesimc_V,        InstEmit.Aesimc_V,        typeof(OpCodeSimd));
+            SetA64("0100111000101000011010xxxxxxxxxx", InstName.Aesmc_V,         InstEmit.Aesmc_V,         typeof(OpCodeSimd));
+            SetA64("0x001110001xxxxx000111xxxxxxxxxx", InstName.And_V,           InstEmit.And_V,           typeof(OpCodeSimdReg));
+            SetA64("0x001110011xxxxx000111xxxxxxxxxx", InstName.Bic_V,           InstEmit.Bic_V,           typeof(OpCodeSimdReg));
+            SetA64("0x10111100000xxx0xx101xxxxxxxxxx", InstName.Bic_Vi,          InstEmit.Bic_Vi,          typeof(OpCodeSimdImm));
+            SetA64("0x10111100000xxx10x101xxxxxxxxxx", InstName.Bic_Vi,          InstEmit.Bic_Vi,          typeof(OpCodeSimdImm));
+            SetA64("0x101110111xxxxx000111xxxxxxxxxx", InstName.Bif_V,           InstEmit.Bif_V,           typeof(OpCodeSimdReg));
+            SetA64("0x101110101xxxxx000111xxxxxxxxxx", InstName.Bit_V,           InstEmit.Bit_V,           typeof(OpCodeSimdReg));
+            SetA64("0x101110011xxxxx000111xxxxxxxxxx", InstName.Bsl_V,           InstEmit.Bsl_V,           typeof(OpCodeSimdReg));
+            SetA64("0x001110<<100000010010xxxxxxxxxx", InstName.Cls_V,           InstEmit.Cls_V,           typeof(OpCodeSimd));
+            SetA64("0x101110<<100000010010xxxxxxxxxx", InstName.Clz_V,           InstEmit.Clz_V,           typeof(OpCodeSimd));
+            SetA64("01111110111xxxxx100011xxxxxxxxxx", InstName.Cmeq_S,          InstEmit.Cmeq_S,          typeof(OpCodeSimdReg));
+            SetA64("0101111011100000100110xxxxxxxxxx", InstName.Cmeq_S,          InstEmit.Cmeq_S,          typeof(OpCodeSimd));
+            SetA64("0>101110<<1xxxxx100011xxxxxxxxxx", InstName.Cmeq_V,          InstEmit.Cmeq_V,          typeof(OpCodeSimdReg));
+            SetA64("0>001110<<100000100110xxxxxxxxxx", InstName.Cmeq_V,          InstEmit.Cmeq_V,          typeof(OpCodeSimd));
+            SetA64("01011110111xxxxx001111xxxxxxxxxx", InstName.Cmge_S,          InstEmit.Cmge_S,          typeof(OpCodeSimdReg));
+            SetA64("0111111011100000100010xxxxxxxxxx", InstName.Cmge_S,          InstEmit.Cmge_S,          typeof(OpCodeSimd));
+            SetA64("0>001110<<1xxxxx001111xxxxxxxxxx", InstName.Cmge_V,          InstEmit.Cmge_V,          typeof(OpCodeSimdReg));
+            SetA64("0>101110<<100000100010xxxxxxxxxx", InstName.Cmge_V,          InstEmit.Cmge_V,          typeof(OpCodeSimd));
+            SetA64("01011110111xxxxx001101xxxxxxxxxx", InstName.Cmgt_S,          InstEmit.Cmgt_S,          typeof(OpCodeSimdReg));
+            SetA64("0101111011100000100010xxxxxxxxxx", InstName.Cmgt_S,          InstEmit.Cmgt_S,          typeof(OpCodeSimd));
+            SetA64("0>001110<<1xxxxx001101xxxxxxxxxx", InstName.Cmgt_V,          InstEmit.Cmgt_V,          typeof(OpCodeSimdReg));
+            SetA64("0>001110<<100000100010xxxxxxxxxx", InstName.Cmgt_V,          InstEmit.Cmgt_V,          typeof(OpCodeSimd));
+            SetA64("01111110111xxxxx001101xxxxxxxxxx", InstName.Cmhi_S,          InstEmit.Cmhi_S,          typeof(OpCodeSimdReg));
+            SetA64("0>101110<<1xxxxx001101xxxxxxxxxx", InstName.Cmhi_V,          InstEmit.Cmhi_V,          typeof(OpCodeSimdReg));
+            SetA64("01111110111xxxxx001111xxxxxxxxxx", InstName.Cmhs_S,          InstEmit.Cmhs_S,          typeof(OpCodeSimdReg));
+            SetA64("0>101110<<1xxxxx001111xxxxxxxxxx", InstName.Cmhs_V,          InstEmit.Cmhs_V,          typeof(OpCodeSimdReg));
+            SetA64("0111111011100000100110xxxxxxxxxx", InstName.Cmle_S,          InstEmit.Cmle_S,          typeof(OpCodeSimd));
+            SetA64("0>101110<<100000100110xxxxxxxxxx", InstName.Cmle_V,          InstEmit.Cmle_V,          typeof(OpCodeSimd));
+            SetA64("0101111011100000101010xxxxxxxxxx", InstName.Cmlt_S,          InstEmit.Cmlt_S,          typeof(OpCodeSimd));
+            SetA64("0>001110<<100000101010xxxxxxxxxx", InstName.Cmlt_V,          InstEmit.Cmlt_V,          typeof(OpCodeSimd));
+            SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S,         InstEmit.Cmtst_S,         typeof(OpCodeSimdReg));
+            SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V,         InstEmit.Cmtst_V,         typeof(OpCodeSimdReg));
+            SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V,           InstEmit.Cnt_V,           typeof(OpCodeSimd));
+            SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp,          InstEmit.Dup_Gp,          typeof(OpCodeSimdIns));
+            SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S,           InstEmit.Dup_S,           typeof(OpCodeSimdIns));
+            SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V,           InstEmit.Dup_V,           typeof(OpCodeSimdIns));
+            SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V,           InstEmit.Eor_V,           typeof(OpCodeSimdReg));
+            SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstName.Ext_V,           InstEmit.Ext_V,           typeof(OpCodeSimdExt));
+            SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstName.Fabd_S,          InstEmit.Fabd_S,          typeof(OpCodeSimdReg));
+            SetA64("0>1011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V,          InstEmit.Fabd_V,          typeof(OpCodeSimdReg));
+            SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S,          InstEmit.Fabs_S,          typeof(OpCodeSimd));
+            SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V,          InstEmit.Fabs_V,          typeof(OpCodeSimd));
+            SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S,          InstEmit.Fadd_S,          typeof(OpCodeSimdReg));
+            SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V,          InstEmit.Fadd_V,          typeof(OpCodeSimdReg));
+            SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S,         InstEmit.Faddp_S,         typeof(OpCodeSimd));
+            SetA64("0>1011100<1xxxxx110101xxxxxxxxxx", InstName.Faddp_V,         InstEmit.Faddp_V,         typeof(OpCodeSimdReg));
+            SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", InstName.Fccmp_S,         InstEmit.Fccmp_S,         typeof(OpCodeSimdFcond));
+            SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", InstName.Fccmpe_S,        InstEmit.Fccmpe_S,        typeof(OpCodeSimdFcond));
+            SetA64("010111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_S,         InstEmit.Fcmeq_S,         typeof(OpCodeSimdReg));
+            SetA64("010111101x100000110110xxxxxxxxxx", InstName.Fcmeq_S,         InstEmit.Fcmeq_S,         typeof(OpCodeSimd));
+            SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_V,         InstEmit.Fcmeq_V,         typeof(OpCodeSimdReg));
+            SetA64("0>0011101<100000110110xxxxxxxxxx", InstName.Fcmeq_V,         InstEmit.Fcmeq_V,         typeof(OpCodeSimd));
+            SetA64("011111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmge_S,         InstEmit.Fcmge_S,         typeof(OpCodeSimdReg));
+            SetA64("011111101x100000110010xxxxxxxxxx", InstName.Fcmge_S,         InstEmit.Fcmge_S,         typeof(OpCodeSimd));
+            SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmge_V,         InstEmit.Fcmge_V,         typeof(OpCodeSimdReg));
+            SetA64("0>1011101<100000110010xxxxxxxxxx", InstName.Fcmge_V,         InstEmit.Fcmge_V,         typeof(OpCodeSimd));
+            SetA64("011111101x1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_S,         InstEmit.Fcmgt_S,         typeof(OpCodeSimdReg));
+            SetA64("010111101x100000110010xxxxxxxxxx", InstName.Fcmgt_S,         InstEmit.Fcmgt_S,         typeof(OpCodeSimd));
+            SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_V,         InstEmit.Fcmgt_V,         typeof(OpCodeSimdReg));
+            SetA64("0>0011101<100000110010xxxxxxxxxx", InstName.Fcmgt_V,         InstEmit.Fcmgt_V,         typeof(OpCodeSimd));
+            SetA64("011111101x100000110110xxxxxxxxxx", InstName.Fcmle_S,         InstEmit.Fcmle_S,         typeof(OpCodeSimd));
+            SetA64("0>1011101<100000110110xxxxxxxxxx", InstName.Fcmle_V,         InstEmit.Fcmle_V,         typeof(OpCodeSimd));
+            SetA64("010111101x100000111010xxxxxxxxxx", InstName.Fcmlt_S,         InstEmit.Fcmlt_S,         typeof(OpCodeSimd));
+            SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V,         InstEmit.Fcmlt_V,         typeof(OpCodeSimd));
+            SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S,          InstEmit.Fcmp_S,          typeof(OpCodeSimdReg));
+            SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S,         InstEmit.Fcmpe_S,         typeof(OpCodeSimdReg));
+            SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S,         InstEmit.Fcsel_S,         typeof(OpCodeSimdFcond));
+            SetA64("00011110xx10001xx10000xxxxxxxxxx", InstName.Fcvt_S,          InstEmit.Fcvt_S,          typeof(OpCodeSimd));
+            SetA64("x00111100x100100000000xxxxxxxxxx", InstName.Fcvtas_Gp,       InstEmit.Fcvtas_Gp,       typeof(OpCodeSimdCvt));
+            SetA64("x00111100x100101000000xxxxxxxxxx", InstName.Fcvtau_Gp,       InstEmit.Fcvtau_Gp,       typeof(OpCodeSimdCvt));
+            SetA64("0x0011100x100001011110xxxxxxxxxx", InstName.Fcvtl_V,         InstEmit.Fcvtl_V,         typeof(OpCodeSimd));
+            SetA64("x00111100x110000000000xxxxxxxxxx", InstName.Fcvtms_Gp,       InstEmit.Fcvtms_Gp,       typeof(OpCodeSimdCvt));
+            SetA64("x00111100x110001000000xxxxxxxxxx", InstName.Fcvtmu_Gp,       InstEmit.Fcvtmu_Gp,       typeof(OpCodeSimdCvt));
+            SetA64("0x0011100x100001011010xxxxxxxxxx", InstName.Fcvtn_V,         InstEmit.Fcvtn_V,         typeof(OpCodeSimd));
+            SetA64("010111100x100001101010xxxxxxxxxx", InstName.Fcvtns_S,        InstEmit.Fcvtns_S,        typeof(OpCodeSimd));
+            SetA64("0>0011100<100001101010xxxxxxxxxx", InstName.Fcvtns_V,        InstEmit.Fcvtns_V,        typeof(OpCodeSimd));
+            SetA64("011111100x100001101010xxxxxxxxxx", InstName.Fcvtnu_S,        InstEmit.Fcvtnu_S,        typeof(OpCodeSimd));
+            SetA64("0>1011100<100001101010xxxxxxxxxx", InstName.Fcvtnu_V,        InstEmit.Fcvtnu_V,        typeof(OpCodeSimd));
+            SetA64("x00111100x101000000000xxxxxxxxxx", InstName.Fcvtps_Gp,       InstEmit.Fcvtps_Gp,       typeof(OpCodeSimdCvt));
+            SetA64("x00111100x101001000000xxxxxxxxxx", InstName.Fcvtpu_Gp,       InstEmit.Fcvtpu_Gp,       typeof(OpCodeSimdCvt));
+            SetA64("x00111100x111000000000xxxxxxxxxx", InstName.Fcvtzs_Gp,       InstEmit.Fcvtzs_Gp,       typeof(OpCodeSimdCvt));
+            SetA64(">00111100x011000>xxxxxxxxxxxxxxx", InstName.Fcvtzs_Gp_Fixed, InstEmit.Fcvtzs_Gp_Fixed, typeof(OpCodeSimdCvt));
+            SetA64("010111101x100001101110xxxxxxxxxx", InstName.Fcvtzs_S,        InstEmit.Fcvtzs_S,        typeof(OpCodeSimd));
+            SetA64("0>0011101<100001101110xxxxxxxxxx", InstName.Fcvtzs_V,        InstEmit.Fcvtzs_V,        typeof(OpCodeSimd));
+            SetA64("0x001111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed,  InstEmit.Fcvtzs_V_Fixed,  typeof(OpCodeSimdShImm));
+            SetA64("0100111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed,  InstEmit.Fcvtzs_V_Fixed,  typeof(OpCodeSimdShImm));
+            SetA64("x00111100x111001000000xxxxxxxxxx", InstName.Fcvtzu_Gp,       InstEmit.Fcvtzu_Gp,       typeof(OpCodeSimdCvt));
+            SetA64(">00111100x011001>xxxxxxxxxxxxxxx", InstName.Fcvtzu_Gp_Fixed, InstEmit.Fcvtzu_Gp_Fixed, typeof(OpCodeSimdCvt));
+            SetA64("011111101x100001101110xxxxxxxxxx", InstName.Fcvtzu_S,        InstEmit.Fcvtzu_S,        typeof(OpCodeSimd));
+            SetA64("0>1011101<100001101110xxxxxxxxxx", InstName.Fcvtzu_V,        InstEmit.Fcvtzu_V,        typeof(OpCodeSimd));
+            SetA64("0x101111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed,  InstEmit.Fcvtzu_V_Fixed,  typeof(OpCodeSimdShImm));
+            SetA64("0110111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed,  InstEmit.Fcvtzu_V_Fixed,  typeof(OpCodeSimdShImm));
+            SetA64("000111100x1xxxxx000110xxxxxxxxxx", InstName.Fdiv_S,          InstEmit.Fdiv_S,          typeof(OpCodeSimdReg));
+            SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", InstName.Fdiv_V,          InstEmit.Fdiv_V,          typeof(OpCodeSimdReg));
+            SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", InstName.Fmadd_S,         InstEmit.Fmadd_S,         typeof(OpCodeSimdReg));
+            SetA64("000111100x1xxxxx010010xxxxxxxxxx", InstName.Fmax_S,          InstEmit.Fmax_S,          typeof(OpCodeSimdReg));
+            SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V,          InstEmit.Fmax_V,          typeof(OpCodeSimdReg));
+            SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S,        InstEmit.Fmaxnm_S,        typeof(OpCodeSimdReg));
+            SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V,        InstEmit.Fmaxnm_V,        typeof(OpCodeSimdReg));
+            SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V,         InstEmit.Fmaxp_V,         typeof(OpCodeSimdReg));
+            SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S,          InstEmit.Fmin_S,          typeof(OpCodeSimdReg));
+            SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V,          InstEmit.Fmin_V,          typeof(OpCodeSimdReg));
+            SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S,        InstEmit.Fminnm_S,        typeof(OpCodeSimdReg));
+            SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V,        InstEmit.Fminnm_V,        typeof(OpCodeSimdReg));
+            SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V,         InstEmit.Fminp_V,         typeof(OpCodeSimdReg));
+            SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se,         InstEmit.Fmla_Se,         typeof(OpCodeSimdRegElemF));
+            SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V,          InstEmit.Fmla_V,          typeof(OpCodeSimdReg));
+            SetA64("0>0011111<xxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Ve,         InstEmit.Fmla_Ve,         typeof(OpCodeSimdRegElemF));
+            SetA64("010111111xxxxxxx0101x0xxxxxxxxxx", InstName.Fmls_Se,         InstEmit.Fmls_Se,         typeof(OpCodeSimdRegElemF));
+            SetA64("0>0011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V,          InstEmit.Fmls_V,          typeof(OpCodeSimdReg));
+            SetA64("0>0011111<xxxxxx0101x0xxxxxxxxxx", InstName.Fmls_Ve,         InstEmit.Fmls_Ve,         typeof(OpCodeSimdRegElemF));
+            SetA64("000111100x100000010000xxxxxxxxxx", InstName.Fmov_S,          InstEmit.Fmov_S,          typeof(OpCodeSimd));
+            SetA64("000111100x1xxxxxxxx10000000xxxxx", InstName.Fmov_Si,         InstEmit.Fmov_Si,         typeof(OpCodeSimdFmov));
+            SetA64("0x00111100000xxx111101xxxxxxxxxx", InstName.Fmov_Vi,         InstEmit.Fmov_Vi,         typeof(OpCodeSimdImm));
+            SetA64("0110111100000xxx111101xxxxxxxxxx", InstName.Fmov_Vi,         InstEmit.Fmov_Vi,         typeof(OpCodeSimdImm));
+            SetA64("0001111000100110000000xxxxxxxxxx", InstName.Fmov_Ftoi,       InstEmit.Fmov_Ftoi,       typeof(OpCodeSimd));
+            SetA64("1001111001100110000000xxxxxxxxxx", InstName.Fmov_Ftoi,       InstEmit.Fmov_Ftoi,       typeof(OpCodeSimd));
+            SetA64("0001111000100111000000xxxxxxxxxx", InstName.Fmov_Itof,       InstEmit.Fmov_Itof,       typeof(OpCodeSimd));
+            SetA64("1001111001100111000000xxxxxxxxxx", InstName.Fmov_Itof,       InstEmit.Fmov_Itof,       typeof(OpCodeSimd));
+            SetA64("1001111010101110000000xxxxxxxxxx", InstName.Fmov_Ftoi1,      InstEmit.Fmov_Ftoi1,      typeof(OpCodeSimd));
+            SetA64("1001111010101111000000xxxxxxxxxx", InstName.Fmov_Itof1,      InstEmit.Fmov_Itof1,      typeof(OpCodeSimd));
+            SetA64("000111110x0xxxxx1xxxxxxxxxxxxxxx", InstName.Fmsub_S,         InstEmit.Fmsub_S,         typeof(OpCodeSimdReg));
+            SetA64("000111100x1xxxxx000010xxxxxxxxxx", InstName.Fmul_S,          InstEmit.Fmul_S,          typeof(OpCodeSimdReg));
+            SetA64("010111111xxxxxxx1001x0xxxxxxxxxx", InstName.Fmul_Se,         InstEmit.Fmul_Se,         typeof(OpCodeSimdRegElemF));
+            SetA64("0>1011100<1xxxxx110111xxxxxxxxxx", InstName.Fmul_V,          InstEmit.Fmul_V,          typeof(OpCodeSimdReg));
+            SetA64("0>0011111<xxxxxx1001x0xxxxxxxxxx", InstName.Fmul_Ve,         InstEmit.Fmul_Ve,         typeof(OpCodeSimdRegElemF));
+            SetA64("010111100x1xxxxx110111xxxxxxxxxx", InstName.Fmulx_S,         InstEmit.Fmulx_S,         typeof(OpCodeSimdReg));
+            SetA64("011111111xxxxxxx1001x0xxxxxxxxxx", InstName.Fmulx_Se,        InstEmit.Fmulx_Se,        typeof(OpCodeSimdRegElemF));
+            SetA64("0>0011100<1xxxxx110111xxxxxxxxxx", InstName.Fmulx_V,         InstEmit.Fmulx_V,         typeof(OpCodeSimdReg));
+            SetA64("0>1011111<xxxxxx1001x0xxxxxxxxxx", InstName.Fmulx_Ve,        InstEmit.Fmulx_Ve,        typeof(OpCodeSimdRegElemF));
+            SetA64("000111100x100001010000xxxxxxxxxx", InstName.Fneg_S,          InstEmit.Fneg_S,          typeof(OpCodeSimd));
+            SetA64("0>1011101<100000111110xxxxxxxxxx", InstName.Fneg_V,          InstEmit.Fneg_V,          typeof(OpCodeSimd));
+            SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", InstName.Fnmadd_S,        InstEmit.Fnmadd_S,        typeof(OpCodeSimdReg));
+            SetA64("000111110x1xxxxx1xxxxxxxxxxxxxxx", InstName.Fnmsub_S,        InstEmit.Fnmsub_S,        typeof(OpCodeSimdReg));
+            SetA64("000111100x1xxxxx100010xxxxxxxxxx", InstName.Fnmul_S,         InstEmit.Fnmul_S,         typeof(OpCodeSimdReg));
+            SetA64("010111101x100001110110xxxxxxxxxx", InstName.Frecpe_S,        InstEmit.Frecpe_S,        typeof(OpCodeSimd));
+            SetA64("0>0011101<100001110110xxxxxxxxxx", InstName.Frecpe_V,        InstEmit.Frecpe_V,        typeof(OpCodeSimd));
+            SetA64("010111100x1xxxxx111111xxxxxxxxxx", InstName.Frecps_S,        InstEmit.Frecps_S,        typeof(OpCodeSimdReg));
+            SetA64("0>0011100<1xxxxx111111xxxxxxxxxx", InstName.Frecps_V,        InstEmit.Frecps_V,        typeof(OpCodeSimdReg));
+            SetA64("010111101x100001111110xxxxxxxxxx", InstName.Frecpx_S,        InstEmit.Frecpx_S,        typeof(OpCodeSimd));
+            SetA64("000111100x100110010000xxxxxxxxxx", InstName.Frinta_S,        InstEmit.Frinta_S,        typeof(OpCodeSimd));
+            SetA64("0>1011100<100001100010xxxxxxxxxx", InstName.Frinta_V,        InstEmit.Frinta_V,        typeof(OpCodeSimd));
+            SetA64("000111100x100111110000xxxxxxxxxx", InstName.Frinti_S,        InstEmit.Frinti_S,        typeof(OpCodeSimd));
+            SetA64("0>1011101<100001100110xxxxxxxxxx", InstName.Frinti_V,        InstEmit.Frinti_V,        typeof(OpCodeSimd));
+            SetA64("000111100x100101010000xxxxxxxxxx", InstName.Frintm_S,        InstEmit.Frintm_S,        typeof(OpCodeSimd));
+            SetA64("0>0011100<100001100110xxxxxxxxxx", InstName.Frintm_V,        InstEmit.Frintm_V,        typeof(OpCodeSimd));
+            SetA64("000111100x100100010000xxxxxxxxxx", InstName.Frintn_S,        InstEmit.Frintn_S,        typeof(OpCodeSimd));
+            SetA64("0>0011100<100001100010xxxxxxxxxx", InstName.Frintn_V,        InstEmit.Frintn_V,        typeof(OpCodeSimd));
+            SetA64("000111100x100100110000xxxxxxxxxx", InstName.Frintp_S,        InstEmit.Frintp_S,        typeof(OpCodeSimd));
+            SetA64("0>0011101<100001100010xxxxxxxxxx", InstName.Frintp_V,        InstEmit.Frintp_V,        typeof(OpCodeSimd));
+            SetA64("000111100x100111010000xxxxxxxxxx", InstName.Frintx_S,        InstEmit.Frintx_S,        typeof(OpCodeSimd));
+            SetA64("0>1011100<100001100110xxxxxxxxxx", InstName.Frintx_V,        InstEmit.Frintx_V,        typeof(OpCodeSimd));
+            SetA64("000111100x100101110000xxxxxxxxxx", InstName.Frintz_S,        InstEmit.Frintz_S,        typeof(OpCodeSimd));
+            SetA64("0>0011101<100001100110xxxxxxxxxx", InstName.Frintz_V,        InstEmit.Frintz_V,        typeof(OpCodeSimd));
+            SetA64("011111101x100001110110xxxxxxxxxx", InstName.Frsqrte_S,       InstEmit.Frsqrte_S,       typeof(OpCodeSimd));
+            SetA64("0>1011101<100001110110xxxxxxxxxx", InstName.Frsqrte_V,       InstEmit.Frsqrte_V,       typeof(OpCodeSimd));
+            SetA64("010111101x1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_S,       InstEmit.Frsqrts_S,       typeof(OpCodeSimdReg));
+            SetA64("0>0011101<1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_V,       InstEmit.Frsqrts_V,       typeof(OpCodeSimdReg));
+            SetA64("000111100x100001110000xxxxxxxxxx", InstName.Fsqrt_S,         InstEmit.Fsqrt_S,         typeof(OpCodeSimd));
+            SetA64("0>1011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V,         InstEmit.Fsqrt_V,         typeof(OpCodeSimd));
+            SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S,          InstEmit.Fsub_S,          typeof(OpCodeSimdReg));
+            SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V,          InstEmit.Fsub_V,          typeof(OpCodeSimdReg));
+            SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp,          InstEmit.Ins_Gp,          typeof(OpCodeSimdIns));
+            SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V,           InstEmit.Ins_V,           typeof(OpCodeSimdIns));
+            SetA64("0x00110001000000xxxxxxxxxxxxxxxx", InstName.Ld__Vms,         InstEmit.Ld__Vms,         typeof(OpCodeSimdMemMs));
+            SetA64("0x001100110xxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vms,         InstEmit.Ld__Vms,         typeof(OpCodeSimdMemMs));
+            SetA64("0x00110101x00000xxxxxxxxxxxxxxxx", InstName.Ld__Vss,         InstEmit.Ld__Vss,         typeof(OpCodeSimdMemSs));
+            SetA64("0x00110111xxxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vss,         InstEmit.Ld__Vss,         typeof(OpCodeSimdMemSs));
+            SetA64("xx10110xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp,             InstEmit.Ldp,             typeof(OpCodeSimdMemPair));
+            SetA64("xx111100x10xxxxxxxxx00xxxxxxxxxx", InstName.Ldr,             InstEmit.Ldr,             typeof(OpCodeSimdMemImm));
+            SetA64("xx111100x10xxxxxxxxx01xxxxxxxxxx", InstName.Ldr,             InstEmit.Ldr,             typeof(OpCodeSimdMemImm));
+            SetA64("xx111100x10xxxxxxxxx11xxxxxxxxxx", InstName.Ldr,             InstEmit.Ldr,             typeof(OpCodeSimdMemImm));
+            SetA64("xx111101x1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr,             InstEmit.Ldr,             typeof(OpCodeSimdMemImm));
+            SetA64("xx111100x11xxxxxxxxx10xxxxxxxxxx", InstName.Ldr,             InstEmit.Ldr,             typeof(OpCodeSimdMemReg));
+            SetA64("xx011100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal,     InstEmit.Ldr_Literal,     typeof(OpCodeSimdMemLit));
+            SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", InstName.Mla_V,           InstEmit.Mla_V,           typeof(OpCodeSimdReg));
+            SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve,          InstEmit.Mla_Ve,          typeof(OpCodeSimdRegElem));
+            SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V,           InstEmit.Mls_V,           typeof(OpCodeSimdReg));
+            SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve,          InstEmit.Mls_Ve,          typeof(OpCodeSimdRegElem));
+            SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V,          InstEmit.Movi_V,          typeof(OpCodeSimdImm));
+            SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V,          InstEmit.Movi_V,          typeof(OpCodeSimdImm));
+            SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V,          InstEmit.Movi_V,          typeof(OpCodeSimdImm));
+            SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V,          InstEmit.Movi_V,          typeof(OpCodeSimdImm));
+            SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V,           InstEmit.Mul_V,           typeof(OpCodeSimdReg));
+            SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve,          InstEmit.Mul_Ve,          typeof(OpCodeSimdRegElem));
+            SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V,          InstEmit.Mvni_V,          typeof(OpCodeSimdImm));
+            SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V,          InstEmit.Mvni_V,          typeof(OpCodeSimdImm));
+            SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V,          InstEmit.Mvni_V,          typeof(OpCodeSimdImm));
+            SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S,           InstEmit.Neg_S,           typeof(OpCodeSimd));
+            SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V,           InstEmit.Neg_V,           typeof(OpCodeSimd));
+            SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V,           InstEmit.Not_V,           typeof(OpCodeSimd));
+            SetA64("0x001110111xxxxx000111xxxxxxxxxx", InstName.Orn_V,           InstEmit.Orn_V,           typeof(OpCodeSimdReg));
+            SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstName.Orr_V,           InstEmit.Orr_V,           typeof(OpCodeSimdReg));
+            SetA64("0x00111100000xxx0xx101xxxxxxxxxx", InstName.Orr_Vi,          InstEmit.Orr_Vi,          typeof(OpCodeSimdImm));
+            SetA64("0x00111100000xxx10x101xxxxxxxxxx", InstName.Orr_Vi,          InstEmit.Orr_Vi,          typeof(OpCodeSimdImm));
+            SetA64("0x101110<<1xxxxx010000xxxxxxxxxx", InstName.Raddhn_V,        InstEmit.Raddhn_V,        typeof(OpCodeSimdReg));
+            SetA64("0x10111001100000010110xxxxxxxxxx", InstName.Rbit_V,          InstEmit.Rbit_V,          typeof(OpCodeSimd));
+            SetA64("0x00111000100000000110xxxxxxxxxx", InstName.Rev16_V,         InstEmit.Rev16_V,         typeof(OpCodeSimd));
+            SetA64("0x1011100x100000000010xxxxxxxxxx", InstName.Rev32_V,         InstEmit.Rev32_V,         typeof(OpCodeSimd));
+            SetA64("0x001110<<100000000010xxxxxxxxxx", InstName.Rev64_V,         InstEmit.Rev64_V,         typeof(OpCodeSimd));
+            SetA64("0x00111100>>>xxx100011xxxxxxxxxx", InstName.Rshrn_V,         InstEmit.Rshrn_V,         typeof(OpCodeSimdShImm));
+            SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V,        InstEmit.Rsubhn_V,        typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V,          InstEmit.Saba_V,          typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", InstName.Sabal_V,         InstEmit.Sabal_V,         typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx011101xxxxxxxxxx", InstName.Sabd_V,          InstEmit.Sabd_V,          typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx011100xxxxxxxxxx", InstName.Sabdl_V,         InstEmit.Sabdl_V,         typeof(OpCodeSimdReg));
+            SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V,        InstEmit.Sadalp_V,        typeof(OpCodeSimd));
+            SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V,         InstEmit.Saddl_V,         typeof(OpCodeSimdReg));
+            SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V,        InstEmit.Saddlp_V,        typeof(OpCodeSimd));
+            SetA64("000011100x110000001110xxxxxxxxxx", InstName.Saddlv_V,        InstEmit.Saddlv_V,        typeof(OpCodeSimd));
+            SetA64("01001110<<110000001110xxxxxxxxxx", InstName.Saddlv_V,        InstEmit.Saddlv_V,        typeof(OpCodeSimd));
+            SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V,         InstEmit.Saddw_V,         typeof(OpCodeSimdReg));
+            SetA64("x00111100x100010000000xxxxxxxxxx", InstName.Scvtf_Gp,        InstEmit.Scvtf_Gp,        typeof(OpCodeSimdCvt));
+            SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstName.Scvtf_Gp_Fixed,  InstEmit.Scvtf_Gp_Fixed,  typeof(OpCodeSimdCvt));
+            SetA64("010111100x100001110110xxxxxxxxxx", InstName.Scvtf_S,         InstEmit.Scvtf_S,         typeof(OpCodeSimd));
+            SetA64("0>0011100<100001110110xxxxxxxxxx", InstName.Scvtf_V,         InstEmit.Scvtf_V,         typeof(OpCodeSimd));
+            SetA64("0x001111001xxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed,   InstEmit.Scvtf_V_Fixed,   typeof(OpCodeSimdShImm));
+            SetA64("0100111101xxxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed,   InstEmit.Scvtf_V_Fixed,   typeof(OpCodeSimdShImm));
+            SetA64("01011110000xxxxx000000xxxxxxxxxx", InstName.Sha1c_V,         InstEmit.Sha1c_V,         typeof(OpCodeSimdReg));
+            SetA64("0101111000101000000010xxxxxxxxxx", InstName.Sha1h_V,         InstEmit.Sha1h_V,         typeof(OpCodeSimd));
+            SetA64("01011110000xxxxx001000xxxxxxxxxx", InstName.Sha1m_V,         InstEmit.Sha1m_V,         typeof(OpCodeSimdReg));
+            SetA64("01011110000xxxxx000100xxxxxxxxxx", InstName.Sha1p_V,         InstEmit.Sha1p_V,         typeof(OpCodeSimdReg));
+            SetA64("01011110000xxxxx001100xxxxxxxxxx", InstName.Sha1su0_V,       InstEmit.Sha1su0_V,       typeof(OpCodeSimdReg));
+            SetA64("0101111000101000000110xxxxxxxxxx", InstName.Sha1su1_V,       InstEmit.Sha1su1_V,       typeof(OpCodeSimd));
+            SetA64("01011110000xxxxx010000xxxxxxxxxx", InstName.Sha256h_V,       InstEmit.Sha256h_V,       typeof(OpCodeSimdReg));
+            SetA64("01011110000xxxxx010100xxxxxxxxxx", InstName.Sha256h2_V,      InstEmit.Sha256h2_V,      typeof(OpCodeSimdReg));
+            SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V,     InstEmit.Sha256su0_V,     typeof(OpCodeSimd));
+            SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V,     InstEmit.Sha256su1_V,     typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V,         InstEmit.Shadd_V,         typeof(OpCodeSimdReg));
+            SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S,           InstEmit.Shl_S,           typeof(OpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V,           InstEmit.Shl_V,           typeof(OpCodeSimdShImm));
+            SetA64("0100111101xxxxxx010101xxxxxxxxxx", InstName.Shl_V,           InstEmit.Shl_V,           typeof(OpCodeSimdShImm));
+            SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V,          InstEmit.Shll_V,          typeof(OpCodeSimd));
+            SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V,          InstEmit.Shrn_V,          typeof(OpCodeSimdShImm));
+            SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V,         InstEmit.Shsub_V,         typeof(OpCodeSimdReg));
+            SetA64("0x10111100>>>xxx010101xxxxxxxxxx", InstName.Sli_V,           InstEmit.Sli_V,           typeof(OpCodeSimdShImm));
+            SetA64("0110111101xxxxxx010101xxxxxxxxxx", InstName.Sli_V,           InstEmit.Sli_V,           typeof(OpCodeSimdShImm));
+            SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V,          InstEmit.Smax_V,          typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstName.Smaxp_V,         InstEmit.Smaxp_V,         typeof(OpCodeSimdReg));
+            SetA64("000011100x110000101010xxxxxxxxxx", InstName.Smaxv_V,         InstEmit.Smaxv_V,         typeof(OpCodeSimd));
+            SetA64("01001110<<110000101010xxxxxxxxxx", InstName.Smaxv_V,         InstEmit.Smaxv_V,         typeof(OpCodeSimd));
+            SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", InstName.Smin_V,          InstEmit.Smin_V,          typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", InstName.Sminp_V,         InstEmit.Sminp_V,         typeof(OpCodeSimdReg));
+            SetA64("000011100x110001101010xxxxxxxxxx", InstName.Sminv_V,         InstEmit.Sminv_V,         typeof(OpCodeSimd));
+            SetA64("01001110<<110001101010xxxxxxxxxx", InstName.Sminv_V,         InstEmit.Sminv_V,         typeof(OpCodeSimd));
+            SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", InstName.Smlal_V,         InstEmit.Smlal_V,         typeof(OpCodeSimdReg));
+            SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve,        InstEmit.Smlal_Ve,        typeof(OpCodeSimdRegElem));
+            SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V,         InstEmit.Smlsl_V,         typeof(OpCodeSimdReg));
+            SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve,        InstEmit.Smlsl_Ve,        typeof(OpCodeSimdRegElem));
+            SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S,          InstEmit.Smov_S,          typeof(OpCodeSimdIns));
+            SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V,         InstEmit.Smull_V,         typeof(OpCodeSimdReg));
+            SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve,        InstEmit.Smull_Ve,        typeof(OpCodeSimdRegElem));
+            SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S,         InstEmit.Sqabs_S,         typeof(OpCodeSimd));
+            SetA64("0>001110<<100000011110xxxxxxxxxx", InstName.Sqabs_V,         InstEmit.Sqabs_V,         typeof(OpCodeSimd));
+            SetA64("01011110xx1xxxxx000011xxxxxxxxxx", InstName.Sqadd_S,         InstEmit.Sqadd_S,         typeof(OpCodeSimdReg));
+            SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", InstName.Sqadd_V,         InstEmit.Sqadd_V,         typeof(OpCodeSimdReg));
+            SetA64("01011110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S,       InstEmit.Sqdmulh_S,       typeof(OpCodeSimdReg));
+            SetA64("01011110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S,       InstEmit.Sqdmulh_S,       typeof(OpCodeSimdReg));
+            SetA64("0x001110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V,       InstEmit.Sqdmulh_V,       typeof(OpCodeSimdReg));
+            SetA64("0x001110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V,       InstEmit.Sqdmulh_V,       typeof(OpCodeSimdReg));
+            SetA64("01111110xx100000011110xxxxxxxxxx", InstName.Sqneg_S,         InstEmit.Sqneg_S,         typeof(OpCodeSimd));
+            SetA64("0>101110<<100000011110xxxxxxxxxx", InstName.Sqneg_V,         InstEmit.Sqneg_V,         typeof(OpCodeSimd));
+            SetA64("01111110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S,      InstEmit.Sqrdmulh_S,      typeof(OpCodeSimdReg));
+            SetA64("01111110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S,      InstEmit.Sqrdmulh_S,      typeof(OpCodeSimdReg));
+            SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V,      InstEmit.Sqrdmulh_V,      typeof(OpCodeSimdReg));
+            SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V,      InstEmit.Sqrdmulh_V,      typeof(OpCodeSimdReg));
+            SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstName.Sqrshl_V,        InstEmit.Sqrshl_V,        typeof(OpCodeSimdReg));
+            SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_S,       InstEmit.Sqrshrn_S,       typeof(OpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V,       InstEmit.Sqrshrn_V,       typeof(OpCodeSimdShImm));
+            SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_S,      InstEmit.Sqrshrun_S,      typeof(OpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_V,      InstEmit.Sqrshrun_V,      typeof(OpCodeSimdShImm));
+            SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstName.Sqshl_V,         InstEmit.Sqshl_V,         typeof(OpCodeSimdReg));
+            SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_S,        InstEmit.Sqshrn_S,        typeof(OpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V,        InstEmit.Sqshrn_V,        typeof(OpCodeSimdShImm));
+            SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S,       InstEmit.Sqshrun_S,       typeof(OpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_V,       InstEmit.Sqshrun_V,       typeof(OpCodeSimdShImm));
+            SetA64("01011110xx1xxxxx001011xxxxxxxxxx", InstName.Sqsub_S,         InstEmit.Sqsub_S,         typeof(OpCodeSimdReg));
+            SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", InstName.Sqsub_V,         InstEmit.Sqsub_V,         typeof(OpCodeSimdReg));
+            SetA64("01011110<<100001010010xxxxxxxxxx", InstName.Sqxtn_S,         InstEmit.Sqxtn_S,         typeof(OpCodeSimd));
+            SetA64("0x001110<<100001010010xxxxxxxxxx", InstName.Sqxtn_V,         InstEmit.Sqxtn_V,         typeof(OpCodeSimd));
+            SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S,        InstEmit.Sqxtun_S,        typeof(OpCodeSimd));
+            SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V,        InstEmit.Sqxtun_V,        typeof(OpCodeSimd));
+            SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V,        InstEmit.Srhadd_V,        typeof(OpCodeSimdReg));
+            SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V,         InstEmit.Srshl_V,         typeof(OpCodeSimdReg));
+            SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S,         InstEmit.Srshr_S,         typeof(OpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V,         InstEmit.Srshr_V,         typeof(OpCodeSimdShImm));
+            SetA64("0100111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_V,         InstEmit.Srshr_V,         typeof(OpCodeSimdShImm));
+            SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S,         InstEmit.Srsra_S,         typeof(OpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V,         InstEmit.Srsra_V,         typeof(OpCodeSimdShImm));
+            SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V,         InstEmit.Srsra_V,         typeof(OpCodeSimdShImm));
+            SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V,          InstEmit.Sshl_V,          typeof(OpCodeSimdReg));
+            SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V,         InstEmit.Sshll_V,         typeof(OpCodeSimdShImm));
+            SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S,          InstEmit.Sshr_S,          typeof(OpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx000001xxxxxxxxxx", InstName.Sshr_V,          InstEmit.Sshr_V,          typeof(OpCodeSimdShImm));
+            SetA64("0100111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_V,          InstEmit.Sshr_V,          typeof(OpCodeSimdShImm));
+            SetA64("0101111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_S,          InstEmit.Ssra_S,          typeof(OpCodeSimdShImm));
+            SetA64("0x00111100>>>xxx000101xxxxxxxxxx", InstName.Ssra_V,          InstEmit.Ssra_V,          typeof(OpCodeSimdShImm));
+            SetA64("0100111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_V,          InstEmit.Ssra_V,          typeof(OpCodeSimdShImm));
+            SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", InstName.Ssubl_V,         InstEmit.Ssubl_V,         typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", InstName.Ssubw_V,         InstEmit.Ssubw_V,         typeof(OpCodeSimdReg));
+            SetA64("0x00110000000000xxxxxxxxxxxxxxxx", InstName.St__Vms,         InstEmit.St__Vms,         typeof(OpCodeSimdMemMs));
+            SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", InstName.St__Vms,         InstEmit.St__Vms,         typeof(OpCodeSimdMemMs));
+            SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", InstName.St__Vss,         InstEmit.St__Vss,         typeof(OpCodeSimdMemSs));
+            SetA64("0x00110110xxxxxxxxxxxxxxxxxxxxxx", InstName.St__Vss,         InstEmit.St__Vss,         typeof(OpCodeSimdMemSs));
+            SetA64("xx10110xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp,             InstEmit.Stp,             typeof(OpCodeSimdMemPair));
+            SetA64("xx111100x00xxxxxxxxx00xxxxxxxxxx", InstName.Str,             InstEmit.Str,             typeof(OpCodeSimdMemImm));
+            SetA64("xx111100x00xxxxxxxxx01xxxxxxxxxx", InstName.Str,             InstEmit.Str,             typeof(OpCodeSimdMemImm));
+            SetA64("xx111100x00xxxxxxxxx11xxxxxxxxxx", InstName.Str,             InstEmit.Str,             typeof(OpCodeSimdMemImm));
+            SetA64("xx111101x0xxxxxxxxxxxxxxxxxxxxxx", InstName.Str,             InstEmit.Str,             typeof(OpCodeSimdMemImm));
+            SetA64("xx111100x01xxxxxxxxx10xxxxxxxxxx", InstName.Str,             InstEmit.Str,             typeof(OpCodeSimdMemReg));
+            SetA64("01111110111xxxxx100001xxxxxxxxxx", InstName.Sub_S,           InstEmit.Sub_S,           typeof(OpCodeSimdReg));
+            SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", InstName.Sub_V,           InstEmit.Sub_V,           typeof(OpCodeSimdReg));
+            SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V,         InstEmit.Subhn_V,         typeof(OpCodeSimdReg));
+            SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S,        InstEmit.Suqadd_S,        typeof(OpCodeSimd));
+            SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V,        InstEmit.Suqadd_V,        typeof(OpCodeSimd));
+            SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V,           InstEmit.Tbl_V,           typeof(OpCodeSimdTbl));
+            SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V,          InstEmit.Trn1_V,          typeof(OpCodeSimdReg));
+            SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V,          InstEmit.Trn2_V,          typeof(OpCodeSimdReg));
+            SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V,          InstEmit.Uaba_V,          typeof(OpCodeSimdReg));
+            SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V,         InstEmit.Uabal_V,         typeof(OpCodeSimdReg));
+            SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V,          InstEmit.Uabd_V,          typeof(OpCodeSimdReg));
+            SetA64("0x101110<<1xxxxx011100xxxxxxxxxx", InstName.Uabdl_V,         InstEmit.Uabdl_V,         typeof(OpCodeSimdReg));
+            SetA64("0x101110<<100000011010xxxxxxxxxx", InstName.Uadalp_V,        InstEmit.Uadalp_V,        typeof(OpCodeSimd));
+            SetA64("0x101110<<1xxxxx000000xxxxxxxxxx", InstName.Uaddl_V,         InstEmit.Uaddl_V,         typeof(OpCodeSimdReg));
+            SetA64("0x101110<<100000001010xxxxxxxxxx", InstName.Uaddlp_V,        InstEmit.Uaddlp_V,        typeof(OpCodeSimd));
+            SetA64("001011100x110000001110xxxxxxxxxx", InstName.Uaddlv_V,        InstEmit.Uaddlv_V,        typeof(OpCodeSimd));
+            SetA64("01101110<<110000001110xxxxxxxxxx", InstName.Uaddlv_V,        InstEmit.Uaddlv_V,        typeof(OpCodeSimd));
+            SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstName.Uaddw_V,         InstEmit.Uaddw_V,         typeof(OpCodeSimdReg));
+            SetA64("x00111100x100011000000xxxxxxxxxx", InstName.Ucvtf_Gp,        InstEmit.Ucvtf_Gp,        typeof(OpCodeSimdCvt));
+            SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstName.Ucvtf_Gp_Fixed,  InstEmit.Ucvtf_Gp_Fixed,  typeof(OpCodeSimdCvt));
+            SetA64("011111100x100001110110xxxxxxxxxx", InstName.Ucvtf_S,         InstEmit.Ucvtf_S,         typeof(OpCodeSimd));
+            SetA64("0>1011100<100001110110xxxxxxxxxx", InstName.Ucvtf_V,         InstEmit.Ucvtf_V,         typeof(OpCodeSimd));
+            SetA64("0x101111001xxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed,   InstEmit.Ucvtf_V_Fixed,   typeof(OpCodeSimdShImm));
+            SetA64("0110111101xxxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed,   InstEmit.Ucvtf_V_Fixed,   typeof(OpCodeSimdShImm));
+            SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstName.Uhadd_V,         InstEmit.Uhadd_V,         typeof(OpCodeSimdReg));
+            SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstName.Uhsub_V,         InstEmit.Uhsub_V,         typeof(OpCodeSimdReg));
+            SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstName.Umax_V,          InstEmit.Umax_V,          typeof(OpCodeSimdReg));
+            SetA64("0x101110<<1xxxxx101001xxxxxxxxxx", InstName.Umaxp_V,         InstEmit.Umaxp_V,         typeof(OpCodeSimdReg));
+            SetA64("001011100x110000101010xxxxxxxxxx", InstName.Umaxv_V,         InstEmit.Umaxv_V,         typeof(OpCodeSimd));
+            SetA64("01101110<<110000101010xxxxxxxxxx", InstName.Umaxv_V,         InstEmit.Umaxv_V,         typeof(OpCodeSimd));
+            SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", InstName.Umin_V,          InstEmit.Umin_V,          typeof(OpCodeSimdReg));
+            SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", InstName.Uminp_V,         InstEmit.Uminp_V,         typeof(OpCodeSimdReg));
+            SetA64("001011100x110001101010xxxxxxxxxx", InstName.Uminv_V,         InstEmit.Uminv_V,         typeof(OpCodeSimd));
+            SetA64("01101110<<110001101010xxxxxxxxxx", InstName.Uminv_V,         InstEmit.Uminv_V,         typeof(OpCodeSimd));
+            SetA64("0x101110<<1xxxxx100000xxxxxxxxxx", InstName.Umlal_V,         InstEmit.Umlal_V,         typeof(OpCodeSimdReg));
+            SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstName.Umlal_Ve,        InstEmit.Umlal_Ve,        typeof(OpCodeSimdRegElem));
+            SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstName.Umlsl_V,         InstEmit.Umlsl_V,         typeof(OpCodeSimdReg));
+            SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstName.Umlsl_Ve,        InstEmit.Umlsl_Ve,        typeof(OpCodeSimdRegElem));
+            SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstName.Umov_S,          InstEmit.Umov_S,          typeof(OpCodeSimdIns));
+            SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstName.Umull_V,         InstEmit.Umull_V,         typeof(OpCodeSimdReg));
+            SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve,        InstEmit.Umull_Ve,        typeof(OpCodeSimdRegElem));
+            SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S,         InstEmit.Uqadd_S,         typeof(OpCodeSimdReg));
+            SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstName.Uqadd_V,         InstEmit.Uqadd_V,         typeof(OpCodeSimdReg));
+            SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstName.Uqrshl_V,        InstEmit.Uqrshl_V,        typeof(OpCodeSimdReg));
+            SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_S,       InstEmit.Uqrshrn_S,       typeof(OpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_V,       InstEmit.Uqrshrn_V,       typeof(OpCodeSimdShImm));
+            SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstName.Uqshl_V,         InstEmit.Uqshl_V,         typeof(OpCodeSimdReg));
+            SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_S,        InstEmit.Uqshrn_S,        typeof(OpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_V,        InstEmit.Uqshrn_V,        typeof(OpCodeSimdShImm));
+            SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstName.Uqsub_S,         InstEmit.Uqsub_S,         typeof(OpCodeSimdReg));
+            SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", InstName.Uqsub_V,         InstEmit.Uqsub_V,         typeof(OpCodeSimdReg));
+            SetA64("01111110<<100001010010xxxxxxxxxx", InstName.Uqxtn_S,         InstEmit.Uqxtn_S,         typeof(OpCodeSimd));
+            SetA64("0x101110<<100001010010xxxxxxxxxx", InstName.Uqxtn_V,         InstEmit.Uqxtn_V,         typeof(OpCodeSimd));
+            SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstName.Urhadd_V,        InstEmit.Urhadd_V,        typeof(OpCodeSimdReg));
+            SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstName.Urshl_V,         InstEmit.Urshl_V,         typeof(OpCodeSimdReg));
+            SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_S,         InstEmit.Urshr_S,         typeof(OpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstName.Urshr_V,         InstEmit.Urshr_V,         typeof(OpCodeSimdShImm));
+            SetA64("0110111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_V,         InstEmit.Urshr_V,         typeof(OpCodeSimdShImm));
+            SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S,         InstEmit.Ursra_S,         typeof(OpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V,         InstEmit.Ursra_V,         typeof(OpCodeSimdShImm));
+            SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V,         InstEmit.Ursra_V,         typeof(OpCodeSimdShImm));
+            SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V,          InstEmit.Ushl_V,          typeof(OpCodeSimdReg));
+            SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V,         InstEmit.Ushll_V,         typeof(OpCodeSimdShImm));
+            SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S,          InstEmit.Ushr_S,          typeof(OpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx000001xxxxxxxxxx", InstName.Ushr_V,          InstEmit.Ushr_V,          typeof(OpCodeSimdShImm));
+            SetA64("0110111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_V,          InstEmit.Ushr_V,          typeof(OpCodeSimdShImm));
+            SetA64("01111110xx100000001110xxxxxxxxxx", InstName.Usqadd_S,        InstEmit.Usqadd_S,        typeof(OpCodeSimd));
+            SetA64("0>101110<<100000001110xxxxxxxxxx", InstName.Usqadd_V,        InstEmit.Usqadd_V,        typeof(OpCodeSimd));
+            SetA64("0111111101xxxxxx000101xxxxxxxxxx", InstName.Usra_S,          InstEmit.Usra_S,          typeof(OpCodeSimdShImm));
+            SetA64("0x10111100>>>xxx000101xxxxxxxxxx", InstName.Usra_V,          InstEmit.Usra_V,          typeof(OpCodeSimdShImm));
+            SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V,          InstEmit.Usra_V,          typeof(OpCodeSimdShImm));
+            SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V,         InstEmit.Usubl_V,         typeof(OpCodeSimdReg));
+            SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V,         InstEmit.Usubw_V,         typeof(OpCodeSimdReg));
+            SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V,          InstEmit.Uzp1_V,          typeof(OpCodeSimdReg));
+            SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V,          InstEmit.Uzp2_V,          typeof(OpCodeSimdReg));
+            SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V,           InstEmit.Xtn_V,           typeof(OpCodeSimd));
+            SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V,          InstEmit.Zip1_V,          typeof(OpCodeSimdReg));
+            SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V,          InstEmit.Zip2_V,          typeof(OpCodeSimdReg));
+#endregion
+
+#region "OpCode Table (AArch32)"
+            // Base
+            SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add,   InstEmit32.Add,   typeof(OpCode32AluImm));
+            SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add,   InstEmit32.Add,   typeof(OpCode32AluRsImm));
+            SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B,     InstEmit32.B,     typeof(OpCode32BImm));
+            SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl,    InstEmit32.Bl,    typeof(OpCode32BImm));
+            SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx,   InstEmit32.Blx,   typeof(OpCode32BImm));
+            SetA32("<<<<000100101111111111110001xxxx", InstName.Bx,    InstEmit32.Bx,    typeof(OpCode32BReg));
+            SetT32("xxxxxxxxxxxxxxxx010001110xxxx000", InstName.Bx,    InstEmit32.Bx,    typeof(OpCodeT16BReg));
+            SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp,   InstEmit32.Cmp,   typeof(OpCode32AluImm));
+            SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp,   InstEmit32.Cmp,   typeof(OpCode32AluRsImm));
+            SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm,   InstEmit32.Ldm,   typeof(OpCode32MemMult));
+            SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr,   InstEmit32.Ldr,   typeof(OpCode32MemImm));
+            SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb,  InstEmit32.Ldrb,  typeof(OpCode32MemImm));
+            SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd,  InstEmit32.Ldrd,  typeof(OpCode32MemImm8));
+            SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh,  InstEmit32.Ldrh,  typeof(OpCode32MemImm8));
+            SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, typeof(OpCode32MemImm8));
+            SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, typeof(OpCode32MemImm8));
+            SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov,   InstEmit32.Mov,   typeof(OpCode32AluImm));
+            SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov,   InstEmit32.Mov,   typeof(OpCode32AluRsImm));
+            SetT32("xxxxxxxxxxxxxxxx00100xxxxxxxxxxx", InstName.Mov,   InstEmit32.Mov,   typeof(OpCodeT16AluImm8));
+            SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm,   InstEmit32.Stm,   typeof(OpCode32MemMult));
+            SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str,   InstEmit32.Str,   typeof(OpCode32MemImm));
+            SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb,  InstEmit32.Strb,  typeof(OpCode32MemImm));
+            SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd,  InstEmit32.Strd,  typeof(OpCode32MemImm8));
+            SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh,  InstEmit32.Strh,  typeof(OpCode32MemImm8));
+            SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub,   InstEmit32.Sub,   typeof(OpCode32AluImm));
+            SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub,   InstEmit32.Sub,   typeof(OpCode32AluRsImm));
+#endregion
+
+            FillFastLookupTable(_instA32FastLookup, _allInstA32);
+            FillFastLookupTable(_instT32FastLookup, _allInstT32);
+            FillFastLookupTable(_instA64FastLookup, _allInstA64);
+        }
+
+        private static void FillFastLookupTable(InstInfo[][] table, List<InstInfo> allInsts)
+        {
+            List<InstInfo>[] temp = new List<InstInfo>[FastLookupSize];
+
+            for (int index = 0; index < FastLookupSize; index++)
+            {
+                temp[index] = new List<InstInfo>();
+            }
+
+            foreach (InstInfo inst in allInsts)
+            {
+                int mask  = ToFastLookupIndex(inst.Mask);
+                int value = ToFastLookupIndex(inst.Value);
+
+                for (int index = 0; index < FastLookupSize; index++)
+                {
+                    if ((index & mask) == value)
+                    {
+                        temp[index].Add(inst);
+                    }
+                }
+            }
+
+            for (int index = 0; index < FastLookupSize; index++)
+            {
+                table[index] = temp[index].ToArray();
+            }
+        }
+
+        private static void SetA32(string encoding, InstName name, InstEmitter emitter, Type type)
+        {
+            Set(encoding, ExecutionMode.Aarch32Arm, new InstDescriptor(name, emitter), type);
+        }
+
+        private static void SetT32(string encoding, InstName name, InstEmitter emitter, Type type)
+        {
+            Set(encoding, ExecutionMode.Aarch32Thumb, new InstDescriptor(name, emitter), type);
+        }
+
+        private static void SetA64(string encoding, InstName name, InstEmitter emitter, Type type)
+        {
+            Set(encoding, ExecutionMode.Aarch64, new InstDescriptor(name, emitter), type);
+        }
+
+        private static void Set(string encoding, ExecutionMode mode, InstDescriptor inst, Type type)
+        {
+            int bit   = encoding.Length - 1;
+            int value = 0;
+            int xMask = 0;
+            int xBits = 0;
+
+            int[] xPos = new int[encoding.Length];
+
+            int blacklisted = 0;
+
+            for (int index = 0; index < encoding.Length; index++, bit--)
+            {
+                // Note: < and > are used on special encodings.
+                // The < means that we should never have ALL bits with the '<' set.
+                // So, when the encoding has <<, it means that 00, 01, and 10 are valid,
+                // but not 11. <<< is 000, 001, ..., 110 but NOT 111, and so on...
+                // For >, the invalid value is zero. So, for >> 01, 10 and 11 are valid,
+                // but 00 isn't.
+                char chr = encoding[index];
+
+                if (chr == '1')
+                {
+                    value |= 1 << bit;
+                }
+                else if (chr == 'x')
+                {
+                    xMask |= 1 << bit;
+                }
+                else if (chr == '>')
+                {
+                    xPos[xBits++] = bit;
+                }
+                else if (chr == '<')
+                {
+                    xPos[xBits++] = bit;
+
+                    blacklisted |= 1 << bit;
+                }
+                else if (chr != '0')
+                {
+                    throw new ArgumentException(nameof(encoding));
+                }
+            }
+
+            xMask = ~xMask;
+
+            if (xBits == 0)
+            {
+                InsertInst(new InstInfo(xMask, value, inst, type), mode);
+
+                return;
+            }
+
+            for (int index = 0; index < (1 << xBits); index++)
+            {
+                int mask = 0;
+
+                for (int x = 0; x < xBits; x++)
+                {
+                    mask |= ((index >> x) & 1) << xPos[x];
+                }
+
+                if (mask != blacklisted)
+                {
+                    InsertInst(new InstInfo(xMask, value | mask, inst, type), mode);
+                }
+            }
+        }
+
+        private static void InsertInst(InstInfo info, ExecutionMode mode)
+        {
+            switch (mode)
+            {
+                case ExecutionMode.Aarch32Arm:   _allInstA32.Add(info); break;
+                case ExecutionMode.Aarch32Thumb: _allInstT32.Add(info); break;
+                case ExecutionMode.Aarch64:      _allInstA64.Add(info); break;
+            }
+        }
+
+        public static (InstDescriptor inst, Type type) GetInstA32(int opCode)
+        {
+            return GetInstFromList(_instA32FastLookup[ToFastLookupIndex(opCode)], opCode);
+        }
+
+        public static (InstDescriptor inst, Type type) GetInstT32(int opCode)
+        {
+            return GetInstFromList(_instT32FastLookup[ToFastLookupIndex(opCode)], opCode);
+        }
+
+        public static (InstDescriptor inst, Type type) GetInstA64(int opCode)
+        {
+            return GetInstFromList(_instA64FastLookup[ToFastLookupIndex(opCode)], opCode);
+        }
+
+        private static (InstDescriptor inst, Type type) GetInstFromList(InstInfo[] insts, int opCode)
+        {
+            foreach (InstInfo info in insts)
+            {
+                if ((opCode & info.Mask) == info.Value)
+                {
+                    return (info.Inst, info.Type);
+                }
+            }
+
+            return (new InstDescriptor(InstName.Und, InstEmit.Und), typeof(OpCode));
+        }
+
+        private static int ToFastLookupIndex(int value)
+        {
+            return ((value >> 10) & 0x00F) | ((value >> 18) & 0xFF0);
+        }
+    }
+}
diff --git a/ARMeilleure/Decoders/RegisterSize.cs b/ARMeilleure/Decoders/RegisterSize.cs
new file mode 100644
index 000000000..c9cea03ed
--- /dev/null
+++ b/ARMeilleure/Decoders/RegisterSize.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    enum RegisterSize
+    {
+        Int32,
+        Int64,
+        Simd64,
+        Simd128
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Decoders/ShiftType.cs b/ARMeilleure/Decoders/ShiftType.cs
new file mode 100644
index 000000000..8583f16ad
--- /dev/null
+++ b/ARMeilleure/Decoders/ShiftType.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.Decoders
+{
+    enum ShiftType
+    {
+        Lsl = 0,
+        Lsr = 1,
+        Asr = 2,
+        Ror = 3
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Diagnostics/IRDumper.cs b/ARMeilleure/Diagnostics/IRDumper.cs
new file mode 100644
index 000000000..55d5b493e
--- /dev/null
+++ b/ARMeilleure/Diagnostics/IRDumper.cs
@@ -0,0 +1,168 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace ARMeilleure.Diagnostics
+{
+    static class IRDumper
+    {
+        private const string Indentation = " ";
+
+        public static string GetDump(ControlFlowGraph cfg)
+        {
+            StringBuilder sb = new StringBuilder();
+
+            Dictionary<Operand, string> localNames = new Dictionary<Operand, string>();
+
+            string indentation = string.Empty;
+
+            void IncreaseIndentation()
+            {
+                indentation += Indentation;
+            }
+
+            void DecreaseIndentation()
+            {
+                indentation = indentation.Substring(0, indentation.Length - Indentation.Length);
+            }
+
+            void AppendLine(string text)
+            {
+                sb.AppendLine(indentation + text);
+            }
+
+            IncreaseIndentation();
+
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                string blockName = GetBlockName(block);
+
+                if (block.Next != null)
+                {
+                    blockName += $" (next {GetBlockName(block.Next)})";
+                }
+
+                if (block.Branch != null)
+                {
+                    blockName += $" (branch {GetBlockName(block.Branch)})";
+                }
+
+                blockName += ":";
+
+                AppendLine(blockName);
+
+                IncreaseIndentation();
+
+                foreach (Node node in block.Operations)
+                {
+                    string[] sources = new string[node.SourcesCount];
+
+                    string instName = string.Empty;
+
+                    if (node is PhiNode phi)
+                    {
+                        for (int index = 0; index < sources.Length; index++)
+                        {
+                            string phiBlockName = GetBlockName(phi.GetBlock(index));
+
+                            string operName = GetOperandName(phi.GetSource(index), localNames);
+
+                            sources[index] = $"({phiBlockName}: {operName})";
+                        }
+
+                        instName = "Phi";
+                    }
+                    else if (node is Operation operation)
+                    {
+                        for (int index = 0; index < sources.Length; index++)
+                        {
+                            sources[index] = GetOperandName(operation.GetSource(index), localNames);
+                        }
+
+                        instName = operation.Instruction.ToString();
+                    }
+
+                    string allSources = string.Join(", ", sources);
+
+                    string line = instName + " " + allSources;
+
+                    if (node.Destination != null)
+                    {
+                        line = GetOperandName(node.Destination, localNames) + " = " + line;
+                    }
+
+                    AppendLine(line);
+                }
+
+                DecreaseIndentation();
+            }
+
+            return sb.ToString();
+        }
+
+        private static string GetBlockName(BasicBlock block)
+        {
+            return $"block{block.Index}";
+        }
+
+        private static string GetOperandName(Operand operand, Dictionary<Operand, string> localNames)
+        {
+            if (operand == null)
+            {
+                return "<NULL>";
+            }
+
+            string name = string.Empty;
+
+            if (operand.Kind == OperandKind.LocalVariable)
+            {
+                if (!localNames.TryGetValue(operand, out string localName))
+                {
+                    localName = "%" + localNames.Count;
+
+                    localNames.Add(operand, localName);
+                }
+
+                name = localName;
+            }
+            else if (operand.Kind == OperandKind.Register)
+            {
+                Register reg = operand.GetRegister();
+
+                switch (reg.Type)
+                {
+                    case RegisterType.Flag:    name = "b" + reg.Index; break;
+                    case RegisterType.Integer: name = "r" + reg.Index; break;
+                    case RegisterType.Vector:  name = "v" + reg.Index; break;
+                }
+            }
+            else if (operand.Kind == OperandKind.Constant)
+            {
+                name = "0x" + operand.Value.ToString("X");
+            }
+            else
+            {
+                name = operand.Kind.ToString().ToLower();
+            }
+
+            return GetTypeName(operand.Type) + " " + name;
+        }
+
+        private static string GetTypeName(OperandType type)
+        {
+            switch (type)
+            {
+                case OperandType.FP32: return "f32";
+                case OperandType.FP64: return "f64";
+                case OperandType.I32:  return "i32";
+                case OperandType.I64:  return "i64";
+                case OperandType.None: return "none";
+                case OperandType.V128: return "v128";
+            }
+
+            throw new ArgumentException($"Invalid operand type \"{type}\".");
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Diagnostics/Logger.cs b/ARMeilleure/Diagnostics/Logger.cs
new file mode 100644
index 000000000..29d9c79b9
--- /dev/null
+++ b/ARMeilleure/Diagnostics/Logger.cs
@@ -0,0 +1,59 @@
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.Diagnostics
+{
+    static class Logger
+    {
+        private static long _startTime;
+
+        private static long[] _accumulatedTime;
+
+        static Logger()
+        {
+            _accumulatedTime = new long[(int)PassName.Count];
+        }
+
+        public static void StartPass(PassName name)
+        {
+#if M_DEBUG
+            WriteOutput(name + " pass started...");
+
+            _startTime = Stopwatch.GetTimestamp();
+#endif
+        }
+
+        public static void EndPass(PassName name, ControlFlowGraph cfg)
+        {
+#if M_DEBUG
+            EndPass(name);
+
+            WriteOutput("IR after " + name + " pass:");
+
+            WriteOutput(IRDumper.GetDump(cfg));
+#endif
+        }
+
+        public static void EndPass(PassName name)
+        {
+#if M_DEBUG
+            long elapsedTime = Stopwatch.GetTimestamp() - _startTime;
+
+            _accumulatedTime[(int)name] += elapsedTime;
+
+            WriteOutput($"{name} pass ended after {GetMilliseconds(_accumulatedTime[(int)name])} ms...");
+#endif
+        }
+
+        private static long GetMilliseconds(long ticks)
+        {
+            return (long)(((double)ticks / Stopwatch.Frequency) * 1000);
+        }
+
+        private static void WriteOutput(string text)
+        {
+            Console.WriteLine(text);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Diagnostics/PassName.cs b/ARMeilleure/Diagnostics/PassName.cs
new file mode 100644
index 000000000..e37439855
--- /dev/null
+++ b/ARMeilleure/Diagnostics/PassName.cs
@@ -0,0 +1,17 @@
+namespace ARMeilleure.Diagnostics
+{
+    enum PassName
+    {
+        Decoding,
+        Translation,
+        RegisterUsage,
+        Dominance,
+        SsaConstruction,
+        Optimization,
+        PreAllocation,
+        RegisterAllocation,
+        CodeGeneration,
+
+        Count
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/CryptoHelper.cs b/ARMeilleure/Instructions/CryptoHelper.cs
new file mode 100644
index 000000000..b6b4a62d3
--- /dev/null
+++ b/ARMeilleure/Instructions/CryptoHelper.cs
@@ -0,0 +1,279 @@
+// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
+
+using ARMeilleure.State;
+
+namespace ARMeilleure.Instructions
+{
+    static class CryptoHelper
+    {
+#region "LookUp Tables"
+        private static readonly byte[] _sBox = new byte[]
+        {
+            0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+            0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+            0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+            0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+            0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+            0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+            0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+            0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+            0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+            0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+            0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+            0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+            0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+            0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+            0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+            0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+        };
+
+        private static readonly byte[] _invSBox = new byte[]
+        {
+            0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+            0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+            0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+            0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+            0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+            0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+            0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+            0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+            0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+            0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+            0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+            0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+            0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+            0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+            0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+            0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+        };
+
+        private static readonly byte[] _gfMul02 = new byte[]
+        {
+            0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+            0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+            0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+            0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+            0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+            0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+            0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+            0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+            0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+            0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
+            0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
+            0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+            0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
+            0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
+            0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+            0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5
+        };
+
+        private static readonly byte[] _gfMul03 = new byte[]
+        {
+            0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+            0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+            0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+            0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+            0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+            0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+            0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+            0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+            0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+            0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
+            0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
+            0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+            0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
+            0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
+            0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+            0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a
+        };
+
+        private static readonly byte[] _gfMul09 = new byte[]
+        {
+            0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+            0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+            0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+            0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
+            0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
+            0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+            0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
+            0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
+            0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+            0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
+            0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+            0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+            0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
+            0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
+            0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+            0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46
+        };
+
+        private static readonly byte[] _gfMul0B = new byte[]
+        {
+            0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+            0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+            0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+            0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
+            0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
+            0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+            0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
+            0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
+            0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+            0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
+            0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
+            0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+            0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+            0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+            0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+            0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3
+        };
+
+        private static readonly byte[] _gfMul0D = new byte[]
+        {
+            0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+            0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+            0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+            0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
+            0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+            0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+            0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
+            0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
+            0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+            0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+            0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
+            0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+            0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+            0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+            0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+            0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97
+        };
+
+        private static readonly byte[] _gfMul0E = new byte[]
+        {
+            0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+            0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+            0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+            0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
+            0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+            0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+            0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
+            0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
+            0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+            0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
+            0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+            0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+            0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
+            0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
+            0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+            0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d
+        };
+
+        private static readonly byte[] _srPerm = new byte[]
+        {
+            0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+        };
+
+        private static readonly byte[] _isrPerm = new byte[]
+        {
+            0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+        };
+#endregion
+
+        public static V128 AesInvMixColumns(V128 op)
+        {
+            byte[] inState  = op.ToArray();
+            byte[] outState = new byte[16];
+
+            for (int columns = 0; columns <= 3; columns++)
+            {
+                int idx = columns << 2;
+
+                byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+                byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+                byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+                byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+                outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]);
+                outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]);
+                outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]);
+                outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]);
+            }
+
+            return new V128(outState);
+        }
+
+        public static V128 AesInvShiftRows(V128 op)
+        {
+            byte[] inState  = op.ToArray();
+            byte[] outState = new byte[16];
+
+            for (int idx = 0; idx <= 15; idx++)
+            {
+                outState[_isrPerm[idx]] = inState[idx];
+            }
+
+            return new V128(outState);
+        }
+
+        public static V128 AesInvSubBytes(V128 op)
+        {
+            byte[] inState  = op.ToArray();
+            byte[] outState = new byte[16];
+
+            for (int idx = 0; idx <= 15; idx++)
+            {
+                outState[idx] = _invSBox[inState[idx]];
+            }
+
+            return new V128(outState);
+        }
+
+        public static V128 AesMixColumns(V128 op)
+        {
+            byte[] inState  = op.ToArray();
+            byte[] outState = new byte[16];
+
+            for (int columns = 0; columns <= 3; columns++)
+            {
+                int idx = columns << 2;
+
+                byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3]
+                byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3]
+                byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3]
+                byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3]
+
+                outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3);
+                outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3);
+                outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]);
+                outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]);
+            }
+
+            return new V128(outState);
+        }
+
+        public static V128 AesShiftRows(V128 op)
+        {
+            byte[] inState  = op.ToArray();
+            byte[] outState = new byte[16];
+
+            for (int idx = 0; idx <= 15; idx++)
+            {
+                outState[_srPerm[idx]] = inState[idx];
+            }
+
+            return new V128(outState);
+        }
+
+        public static V128 AesSubBytes(V128 op)
+        {
+            byte[] inState  = op.ToArray();
+            byte[] outState = new byte[16];
+
+            for (int idx = 0; idx <= 15; idx++)
+            {
+                outState[idx] = _sBox[inState[idx]];
+            }
+
+            return new V128(outState);
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs
new file mode 100644
index 000000000..e90e4d77a
--- /dev/null
+++ b/ARMeilleure/Instructions/DelegateTypes.cs
@@ -0,0 +1,78 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+    delegate double _F64_F64(double a1);
+    delegate double _F64_F64_F64(double a1, double a2);
+    delegate double _F64_F64_F64_F64(double a1, double a2, double a3);
+    delegate double _F64_F64_MidpointRounding(double a1, MidpointRounding a2);
+
+    delegate float _F32_F32(float a1);
+    delegate float _F32_F32_F32(float a1, float a2);
+    delegate float _F32_F32_F32_F32(float a1, float a2, float a3);
+    delegate float _F32_F32_MidpointRounding(float a1, MidpointRounding a2);
+    delegate float _F32_U16(ushort a1);
+
+    delegate int _S32_F32(float a1);
+    delegate int _S32_F32_F32_Bool(float a1, float a2, bool a3);
+    delegate int _S32_F64(double a1);
+    delegate int _S32_F64_F64_Bool(double a1, double a2, bool a3);
+    delegate int _S32_U64_U16(ulong a1, ushort a2);
+    delegate int _S32_U64_U32(ulong a1, uint a2);
+    delegate int _S32_U64_U64(ulong a1, ulong a2);
+    delegate int _S32_U64_U8(ulong a1, byte a2);
+    delegate int _S32_U64_V128(ulong a1, V128 a2);
+
+    delegate long _S64_F32(float a1);
+    delegate long _S64_F64(double a1);
+    delegate long _S64_S64(long a1);
+    delegate long _S64_S64_S32(long a1, int a2);
+    delegate long _S64_S64_S64(long a1, long a2);
+    delegate long _S64_S64_S64_Bool_S32(long a1, long a2, bool a3, int a4);
+    delegate long _S64_S64_S64_S32(long a1, long a2, int a3);
+    delegate long _S64_U64_S32(ulong a1, int a2);
+    delegate long _S64_U64_S64(ulong a1, long a2);
+
+    delegate ushort _U16_F32(float a1);
+    delegate ushort _U16_U64(ulong a1);
+
+    delegate uint _U32_F32(float a1);
+    delegate uint _U32_F64(double a1);
+    delegate uint _U32_U32(uint a1);
+    delegate uint _U32_U32_U16(uint a1, ushort a2);
+    delegate uint _U32_U32_U32(uint a1, uint a2);
+    delegate uint _U32_U32_U64(uint a1, ulong a2);
+    delegate uint _U32_U32_U8(uint a1, byte a2);
+    delegate uint _U32_U64(ulong a1);
+
+    delegate ulong _U64();
+    delegate ulong _U64_F32(float a1);
+    delegate ulong _U64_F64(double a1);
+    delegate ulong _U64_S64_S32(long a1, int a2);
+    delegate ulong _U64_S64_U64(long a1, ulong a2);
+    delegate ulong _U64_U64(ulong a1);
+    delegate ulong _U64_U64_S32(ulong a1, int a2);
+    delegate ulong _U64_U64_S64_S32(ulong a1, long a2, int a3);
+    delegate ulong _U64_U64_U64(ulong a1, ulong a2);
+    delegate ulong _U64_U64_U64_Bool_S32(ulong a1, ulong a2, bool a3, int a4);
+
+    delegate byte _U8_U64(ulong a1);
+
+    delegate V128 _V128_U64(ulong a1);
+    delegate V128 _V128_V128(V128 a1);
+    delegate V128 _V128_V128_U32_V128(V128 a1, uint a2, V128 a3);
+    delegate V128 _V128_V128_V128(V128 a1, V128 a2);
+    delegate V128 _V128_V128_V128_V128(V128 a1, V128 a2, V128 a3);
+    delegate V128 _V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4);
+    delegate V128 _V128_V128_V128_V128_V128_V128(V128 a1, V128 a2, V128 a3, V128 a4, V128 a5);
+
+    delegate void _Void();
+    delegate void _Void_U64(ulong a1);
+    delegate void _Void_U64_S32(ulong a1, int a2);
+    delegate void _Void_U64_U16(ulong a1, ushort a2);
+    delegate void _Void_U64_U32(ulong a1, uint a2);
+    delegate void _Void_U64_U64(ulong a1, ulong a2);
+    delegate void _Void_U64_U8(ulong a1, byte a2);
+    delegate void _Void_U64_V128(ulong a1, V128 a2);
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs
new file mode 100644
index 000000000..947c9f70b
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitAlu.cs
@@ -0,0 +1,369 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Adc(ArmEmitterContext context)  => EmitAdc(context, setFlags: false);
+        public static void Adcs(ArmEmitterContext context) => EmitAdc(context, setFlags: true);
+
+        private static void EmitAdc(ArmEmitterContext context, bool setFlags)
+        {
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context);
+
+            Operand d = context.Add(n, m);
+
+            Operand carry = GetFlag(PState.CFlag);
+
+            if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+            {
+                carry = context.ZeroExtend32(OperandType.I64, carry);
+            }
+
+            d = context.Add(d, carry);
+
+            if (setFlags)
+            {
+                EmitNZFlagsCheck(context, d);
+
+                EmitAdcsCCheck(context, n, d);
+                EmitAddsVCheck(context, n, m, d);
+            }
+
+            SetAluDOrZR(context, d);
+        }
+
+        public static void Add(ArmEmitterContext context)
+        {
+            SetAluD(context, context.Add(GetAluN(context), GetAluM(context)));
+        }
+
+        public static void Adds(ArmEmitterContext context)
+        {
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context);
+
+            context.MarkComparison(n, m);
+
+            Operand d = context.Add(n, m);
+
+            EmitNZFlagsCheck(context, d);
+
+            EmitAddsCCheck(context, n, d);
+            EmitAddsVCheck(context, n, m, d);
+
+            SetAluDOrZR(context, d);
+        }
+
+        public static void And(ArmEmitterContext context)
+        {
+            SetAluD(context, context.BitwiseAnd(GetAluN(context), GetAluM(context)));
+        }
+
+        public static void Ands(ArmEmitterContext context)
+        {
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context);
+
+            Operand d = context.BitwiseAnd(n, m);
+
+            EmitNZFlagsCheck(context, d);
+            EmitCVFlagsClear(context);
+
+            SetAluDOrZR(context, d);
+        }
+
+        public static void Asrv(ArmEmitterContext context)
+        {
+            SetAluDOrZR(context, context.ShiftRightSI(GetAluN(context), GetAluMShift(context)));
+        }
+
+        public static void Bic(ArmEmitterContext context)  => EmitBic(context, setFlags: false);
+        public static void Bics(ArmEmitterContext context) => EmitBic(context, setFlags: true);
+
+        private static void EmitBic(ArmEmitterContext context, bool setFlags)
+        {
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context);
+
+            Operand d = context.BitwiseAnd(n, context.BitwiseNot(m));
+
+            if (setFlags)
+            {
+                EmitNZFlagsCheck(context, d);
+                EmitCVFlagsClear(context);
+            }
+
+            SetAluD(context, d, setFlags);
+        }
+
+        public static void Cls(ArmEmitterContext context)
+        {
+            OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            Operand nHigh = context.ShiftRightUI(n, Const(1));
+
+            bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+            Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue);
+
+            Operand nLow = context.BitwiseAnd(n, mask);
+
+            Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow));
+
+            res = context.Subtract(res, Const(res.Type, 1));
+
+            SetAluDOrZR(context, res);
+        }
+
+        public static void Clz(ArmEmitterContext context)
+        {
+            OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            Operand d = context.CountLeadingZeros(n);
+
+            SetAluDOrZR(context, d);
+        }
+
+        public static void Eon(ArmEmitterContext context)
+        {
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context);
+
+            Operand d = context.BitwiseExclusiveOr(n, context.BitwiseNot(m));
+
+            SetAluD(context, d);
+        }
+
+        public static void Eor(ArmEmitterContext context)
+        {
+            SetAluD(context, context.BitwiseExclusiveOr(GetAluN(context), GetAluM(context)));
+        }
+
+        public static void Extr(ArmEmitterContext context)
+        {
+            OpCodeAluRs op = (OpCodeAluRs)context.CurrOp;
+
+            Operand res = GetIntOrZR(context, op.Rm);
+
+            if (op.Shift != 0)
+            {
+                if (op.Rn == op.Rm)
+                {
+                    res = context.RotateRight(res, Const(op.Shift));
+                }
+                else
+                {
+                    res = context.ShiftRightUI(res, Const(op.Shift));
+
+                    Operand n = GetIntOrZR(context, op.Rn);
+
+                    int invShift = op.GetBitsCount() - op.Shift;
+
+                    res = context.BitwiseOr(res, context.ShiftLeft(n, Const(invShift)));
+                }
+            }
+
+            SetAluDOrZR(context, res);
+        }
+
+        public static void Lslv(ArmEmitterContext context)
+        {
+            SetAluDOrZR(context, context.ShiftLeft(GetAluN(context), GetAluMShift(context)));
+        }
+
+        public static void Lsrv(ArmEmitterContext context)
+        {
+            SetAluDOrZR(context, context.ShiftRightUI(GetAluN(context), GetAluMShift(context)));
+        }
+
+        public static void Sbc(ArmEmitterContext context)  => EmitSbc(context, setFlags: false);
+        public static void Sbcs(ArmEmitterContext context) => EmitSbc(context, setFlags: true);
+
+        private static void EmitSbc(ArmEmitterContext context, bool setFlags)
+        {
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context);
+
+            Operand d = context.Subtract(n, m);
+
+            Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1));
+
+            if (context.CurrOp.RegisterSize == RegisterSize.Int64)
+            {
+                borrow = context.ZeroExtend32(OperandType.I64, borrow);
+            }
+
+            d = context.Subtract(d, borrow);
+
+            if (setFlags)
+            {
+                EmitNZFlagsCheck(context, d);
+
+                EmitSbcsCCheck(context, n, m);
+                EmitSubsVCheck(context, n, m, d);
+            }
+
+            SetAluDOrZR(context, d);
+        }
+
+        public static void Sub(ArmEmitterContext context)
+        {
+            SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context)));
+        }
+
+        public static void Subs(ArmEmitterContext context)
+        {
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context);
+
+            context.MarkComparison(n, m);
+
+            Operand d = context.Subtract(n, m);
+
+            EmitNZFlagsCheck(context, d);
+
+            EmitSubsCCheck(context, n, m);
+            EmitSubsVCheck(context, n, m, d);
+
+            SetAluDOrZR(context, d);
+        }
+
+        public static void Orn(ArmEmitterContext context)
+        {
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context);
+
+            Operand d = context.BitwiseOr(n, context.BitwiseNot(m));
+
+            SetAluD(context, d);
+        }
+
+        public static void Orr(ArmEmitterContext context)
+        {
+            SetAluD(context, context.BitwiseOr(GetAluN(context), GetAluM(context)));
+        }
+
+        public static void Rbit(ArmEmitterContext context)
+        {
+            OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+            Operand d;
+
+            if (op.RegisterSize == RegisterSize.Int32)
+            {
+                d = context.Call(new _U32_U32(SoftFallback.ReverseBits32), n);
+            }
+            else
+            {
+                d = context.Call(new _U64_U64(SoftFallback.ReverseBits64), n);
+            }
+
+            SetAluDOrZR(context, d);
+        }
+
+        public static void Rev16(ArmEmitterContext context)
+        {
+            OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+            Operand d;
+
+            if (op.RegisterSize == RegisterSize.Int32)
+            {
+                d = context.Call(new _U32_U32(SoftFallback.ReverseBytes16_32), n);
+            }
+            else
+            {
+                d = context.Call(new _U64_U64(SoftFallback.ReverseBytes16_64), n);
+            }
+
+            SetAluDOrZR(context, d);
+        }
+
+        public static void Rev32(ArmEmitterContext context)
+        {
+            OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            if (op.RegisterSize == RegisterSize.Int32)
+            {
+                SetAluDOrZR(context, context.ByteSwap(n));
+            }
+            else
+            {
+                Operand d = context.Call(new _U64_U64(SoftFallback.ReverseBytes32_64), n);
+
+                SetAluDOrZR(context, d);
+            }
+        }
+
+        public static void Rev64(ArmEmitterContext context)
+        {
+            OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+            SetAluDOrZR(context, context.ByteSwap(GetIntOrZR(context, op.Rn)));
+        }
+
+        public static void Rorv(ArmEmitterContext context)
+        {
+            SetAluDOrZR(context, context.RotateRight(GetAluN(context), GetAluMShift(context)));
+        }
+
+        private static Operand GetAluMShift(ArmEmitterContext context)
+        {
+            IOpCodeAluRs op = (IOpCodeAluRs)context.CurrOp;
+
+            Operand m = GetIntOrZR(context, op.Rm);
+
+            if (op.RegisterSize == RegisterSize.Int64)
+            {
+                m = context.ConvertI64ToI32(m);
+            }
+
+            return context.BitwiseAnd(m, Const(context.CurrOp.GetBitsCount() - 1));
+        }
+
+        private static void EmitCVFlagsClear(ArmEmitterContext context)
+        {
+            SetFlag(context, PState.CFlag, Const(0));
+            SetFlag(context, PState.VFlag, Const(0));
+        }
+
+        public static void SetAluD(ArmEmitterContext context, Operand d)
+        {
+            SetAluD(context, d, x31IsZR: false);
+        }
+
+        public static void SetAluDOrZR(ArmEmitterContext context, Operand d)
+        {
+            SetAluD(context, d, x31IsZR: true);
+        }
+
+        public static void SetAluD(ArmEmitterContext context, Operand d, bool x31IsZR)
+        {
+            IOpCodeAlu op = (IOpCodeAlu)context.CurrOp;
+
+            if ((x31IsZR || op is IOpCodeAluRs) && op.Rd == RegisterConsts.ZeroIndex)
+            {
+                return;
+            }
+
+            SetIntOrSP(context, op.Rd, d);
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs
new file mode 100644
index 000000000..79b0abbc3
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -0,0 +1,129 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit32
+    {
+        public static void Add(ArmEmitterContext context)
+        {
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context, setCarry: false);
+
+            Operand res = context.Add(n, m);
+
+            if (op.SetFlags)
+            {
+                EmitNZFlagsCheck(context, res);
+
+                EmitAddsCCheck(context, n, res);
+                EmitAddsVCheck(context, n, m, res);
+            }
+
+            EmitAluStore(context, res);
+        }
+
+        public static void Cmp(ArmEmitterContext context)
+        {
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context, setCarry: false);
+
+            Operand res = context.Subtract(n, m);
+
+            EmitNZFlagsCheck(context, res);
+
+            EmitSubsCCheck(context, n, res);
+            EmitSubsVCheck(context, n, m, res);
+        }
+
+        public static void Mov(ArmEmitterContext context)
+        {
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+            Operand m = GetAluM(context);
+
+            if (op.SetFlags)
+            {
+                EmitNZFlagsCheck(context, m);
+            }
+
+            EmitAluStore(context, m);
+        }
+
+        public static void Sub(ArmEmitterContext context)
+        {
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context, setCarry: false);
+
+            Operand res = context.Subtract(n, m);
+
+            if (op.SetFlags)
+            {
+                EmitNZFlagsCheck(context, res);
+
+                EmitSubsCCheck(context, n, res);
+                EmitSubsVCheck(context, n, m, res);
+            }
+
+            EmitAluStore(context, res);
+        }
+
+        private static void EmitAluStore(ArmEmitterContext context, Operand value)
+        {
+            IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
+
+            if (op.Rd == RegisterAlias.Aarch32Pc)
+            {
+                if (op.SetFlags)
+                {
+                    // TODO: Load SPSR etc.
+                    Operand isThumb = GetFlag(PState.TFlag);
+
+                    Operand lblThumb = Label();
+
+                    context.BranchIfTrue(lblThumb, isThumb);
+
+                    context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~3))));
+
+                    context.MarkLabel(lblThumb);
+
+                    context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1))));
+                }
+                else
+                {
+                    EmitAluWritePc(context, value);
+                }
+            }
+            else
+            {
+                SetIntA32(context, op.Rd, value);
+            }
+        }
+
+        private static void EmitAluWritePc(ArmEmitterContext context, Operand value)
+        {
+            context.StoreToContext();
+
+            if (IsThumb(context.CurrOp))
+            {
+                context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1))));
+            }
+            else
+            {
+                EmitBxWritePc(context, value);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs
new file mode 100644
index 000000000..81d5c9eb3
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs
@@ -0,0 +1,351 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static class InstEmitAluHelper
+    {
+        public static void EmitNZFlagsCheck(ArmEmitterContext context, Operand d)
+        {
+            SetFlag(context, PState.NFlag, context.ICompareLess (d, Const(d.Type, 0)));
+            SetFlag(context, PState.ZFlag, context.ICompareEqual(d, Const(d.Type, 0)));
+        }
+
+        public static void EmitAdcsCCheck(ArmEmitterContext context, Operand n, Operand d)
+        {
+            // C = (Rd == Rn && CIn) || Rd < Rn
+            Operand cIn = GetFlag(PState.CFlag);
+
+            Operand cOut = context.BitwiseAnd(context.ICompareEqual(d, n), cIn);
+
+            cOut = context.BitwiseOr(cOut, context.ICompareLessUI(d, n));
+
+            SetFlag(context, PState.CFlag, cOut);
+        }
+
+        public static void EmitAddsCCheck(ArmEmitterContext context, Operand n, Operand d)
+        {
+            // C = Rd < Rn
+            SetFlag(context, PState.CFlag, context.ICompareLessUI(d, n));
+        }
+
+        public static void EmitAddsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+        {
+            // V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0
+            Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+            vOut = context.BitwiseAnd(vOut, context.BitwiseNot(context.BitwiseExclusiveOr(n, m)));
+
+            vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+            SetFlag(context, PState.VFlag, vOut);
+        }
+
+        public static void EmitSbcsCCheck(ArmEmitterContext context, Operand n, Operand m)
+        {
+            // C = (Rn == Rm && CIn) || Rn > Rm
+            Operand cIn = GetFlag(PState.CFlag);
+
+            Operand cOut = context.BitwiseAnd(context.ICompareEqual(n, m), cIn);
+
+            cOut = context.BitwiseOr(cOut, context.ICompareGreaterUI(n, m));
+
+            SetFlag(context, PState.CFlag, cOut);
+        }
+
+        public static void EmitSubsCCheck(ArmEmitterContext context, Operand n, Operand m)
+        {
+            // C = Rn >= Rm
+            SetFlag(context, PState.CFlag, context.ICompareGreaterOrEqualUI(n, m));
+        }
+
+        public static void EmitSubsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d)
+        {
+            // V = (Rd ^ Rn) & (Rn ^ Rm) < 0
+            Operand vOut = context.BitwiseExclusiveOr(d, n);
+
+            vOut = context.BitwiseAnd(vOut, context.BitwiseExclusiveOr(n, m));
+
+            vOut = context.ICompareLess(vOut, Const(vOut.Type, 0));
+
+            SetFlag(context, PState.VFlag, vOut);
+        }
+
+
+        public static Operand GetAluN(ArmEmitterContext context)
+        {
+            if (context.CurrOp is IOpCodeAlu op)
+            {
+                if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs)
+                {
+                    return GetIntOrZR(context, op.Rn);
+                }
+                else
+                {
+                    return GetIntOrSP(context, op.Rn);
+                }
+            }
+            else if (context.CurrOp is IOpCode32Alu op32)
+            {
+                return GetIntA32(context, op32.Rn);
+            }
+            else
+            {
+                throw InvalidOpCodeType(context.CurrOp);
+            }
+        }
+
+        public static Operand GetAluM(ArmEmitterContext context, bool setCarry = true)
+        {
+            switch (context.CurrOp)
+            {
+                // ARM32.
+                case OpCode32AluImm op:
+                {
+                    if (op.SetFlags && op.IsRotated)
+                    {
+                        SetFlag(context, PState.CFlag, Const((uint)op.Immediate >> 31));
+                    }
+
+                    return Const(op.Immediate);
+                }
+
+                case OpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry);
+
+                case OpCodeT16AluImm8 op: return Const(op.Immediate);
+
+                // ARM64.
+                case IOpCodeAluImm op:
+                {
+                    if (op.GetOperandType() == OperandType.I32)
+                    {
+                        return Const((int)op.Immediate);
+                    }
+                    else
+                    {
+                        return Const(op.Immediate);
+                    }
+                }
+
+                case IOpCodeAluRs op:
+                {
+                    Operand value = GetIntOrZR(context, op.Rm);
+
+                    switch (op.ShiftType)
+                    {
+                        case ShiftType.Lsl: value = context.ShiftLeft   (value, Const(op.Shift)); break;
+                        case ShiftType.Lsr: value = context.ShiftRightUI(value, Const(op.Shift)); break;
+                        case ShiftType.Asr: value = context.ShiftRightSI(value, Const(op.Shift)); break;
+                        case ShiftType.Ror: value = context.RotateRight (value, Const(op.Shift)); break;
+                    }
+
+                    return value;
+                }
+
+                case IOpCodeAluRx op:
+                {
+                    Operand value = GetExtendedM(context, op.Rm, op.IntType);
+
+                    value = context.ShiftLeft(value, Const(op.Shift));
+
+                    return value;
+                }
+
+                default: throw InvalidOpCodeType(context.CurrOp);
+            }
+        }
+
+        private static Exception InvalidOpCodeType(OpCode opCode)
+        {
+            return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\".");
+        }
+
+        // ARM32 helpers.
+        private static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry)
+        {
+            Operand m = GetIntA32(context, op.Rm);
+
+            int shift = op.Imm;
+
+            if (shift == 0)
+            {
+                switch (op.ShiftType)
+                {
+                    case ShiftType.Lsr: shift = 32; break;
+                    case ShiftType.Asr: shift = 32; break;
+                    case ShiftType.Ror: shift = 1;  break;
+                }
+            }
+
+            if (shift != 0)
+            {
+                setCarry &= op.SetFlags;
+
+                switch (op.ShiftType)
+                {
+                    case ShiftType.Lsl: m = GetLslC(context, m, setCarry, shift); break;
+                    case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break;
+                    case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break;
+                    case ShiftType.Ror:
+                        if (op.Imm != 0)
+                        {
+                            m = GetRorC(context, m, setCarry, shift);
+                        }
+                        else
+                        {
+                            m = GetRrxC(context, m, setCarry);
+                        }
+                        break;
+                }
+            }
+
+            return m;
+        }
+
+        private static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+        {
+            if ((uint)shift > 32)
+            {
+                return GetShiftByMoreThan32(context, setCarry);
+            }
+            else if (shift == 32)
+            {
+                if (setCarry)
+                {
+                    SetCarryMLsb(context, m);
+                }
+
+                return Const(0);
+            }
+            else
+            {
+                if (setCarry)
+                {
+                    Operand cOut = context.ShiftRightUI(m, Const(32 - shift));
+
+                    cOut = context.BitwiseAnd(cOut, Const(1));
+
+                    SetFlag(context, PState.CFlag, cOut);
+                }
+
+                return context.ShiftLeft(m, Const(shift));
+            }
+        }
+
+        private static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+        {
+            if ((uint)shift > 32)
+            {
+                return GetShiftByMoreThan32(context, setCarry);
+            }
+            else if (shift == 32)
+            {
+                if (setCarry)
+                {
+                    SetCarryMMsb(context, m);
+                }
+
+                return Const(0);
+            }
+            else
+            {
+                if (setCarry)
+                {
+                    SetCarryMShrOut(context, m, shift);
+                }
+
+                return context.ShiftRightUI(m, Const(shift));
+            }
+        }
+
+        private static Operand GetShiftByMoreThan32(ArmEmitterContext context, bool setCarry)
+        {
+            if (setCarry)
+            {
+                SetFlag(context, PState.CFlag, Const(0));;
+            }
+
+            return Const(0);
+        }
+
+        private static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+        {
+            if ((uint)shift >= 32)
+            {
+                m = context.ShiftRightSI(m, Const(31));
+
+                if (setCarry)
+                {
+                    SetCarryMLsb(context, m);
+                }
+
+                return m;
+            }
+            else
+            {
+                if (setCarry)
+                {
+                    SetCarryMShrOut(context, m, shift);
+                }
+
+                return context.ShiftRightSI(m, Const(shift));
+            }
+        }
+
+        private static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift)
+        {
+            shift &= 0x1f;
+
+            m = context.RotateRight(m, Const(shift));
+
+            if (setCarry)
+            {
+                SetCarryMMsb(context, m);
+            }
+
+            return m;
+        }
+
+        private static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry)
+        {
+            // Rotate right by 1 with carry.
+            Operand cIn = context.Copy(GetFlag(PState.CFlag));
+
+            if (setCarry)
+            {
+                SetCarryMLsb(context, m);
+            }
+
+            m = context.ShiftRightUI(m, Const(1));
+
+            m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31)));
+
+            return m;
+        }
+
+        private static void SetCarryMLsb(ArmEmitterContext context, Operand m)
+        {
+            SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1)));
+        }
+
+        private static void SetCarryMMsb(ArmEmitterContext context, Operand m)
+        {
+            SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31)));
+        }
+
+        private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift)
+        {
+            Operand cOut = context.ShiftRightUI(m, Const(shift - 1));
+
+            cOut = context.BitwiseAnd(cOut, Const(1));
+
+            SetFlag(context, PState.CFlag, cOut);
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitBfm.cs b/ARMeilleure/Instructions/InstEmitBfm.cs
new file mode 100644
index 000000000..8fdbf6cfd
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitBfm.cs
@@ -0,0 +1,196 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Bfm(ArmEmitterContext context)
+        {
+            OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+            Operand d = GetIntOrZR(context, op.Rd);
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            Operand res;
+
+            if (op.Pos < op.Shift)
+            {
+                // BFI.
+                int shift = op.GetBitsCount() - op.Shift;
+
+                int width = op.Pos + 1;
+
+                long mask = (long)(ulong.MaxValue >> (64 - width));
+
+                res = context.ShiftLeft(context.BitwiseAnd(n, Const(n.Type, mask)), Const(shift));
+
+                res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~(mask << shift))));
+            }
+            else
+            {
+                // BFXIL.
+                int shift = op.Shift;
+
+                int width = op.Pos - shift + 1;
+
+                long mask = (long)(ulong.MaxValue >> (64 - width));
+
+                res = context.BitwiseAnd(context.ShiftRightUI(n, Const(shift)), Const(n.Type, mask));
+
+                res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~mask)));
+            }
+
+            SetIntOrZR(context, op.Rd, res);
+        }
+
+        public static void Sbfm(ArmEmitterContext context)
+        {
+            OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+            int bitsCount = op.GetBitsCount();
+
+            if (op.Pos + 1 == bitsCount)
+            {
+                EmitSbfmShift(context);
+            }
+            else if (op.Pos < op.Shift)
+            {
+                EmitSbfiz(context);
+            }
+            else if (op.Pos == 7 && op.Shift == 0)
+            {
+                Operand n = GetIntOrZR(context, op.Rn);
+
+                SetIntOrZR(context, op.Rd, context.SignExtend8(n.Type, n));
+            }
+            else if (op.Pos == 15 && op.Shift == 0)
+            {
+                Operand n = GetIntOrZR(context, op.Rn);
+
+                SetIntOrZR(context, op.Rd, context.SignExtend16(n.Type, n));
+            }
+            else if (op.Pos == 31 && op.Shift == 0)
+            {
+                Operand n = GetIntOrZR(context, op.Rn);
+
+                SetIntOrZR(context, op.Rd, context.SignExtend32(n.Type, n));
+            }
+            else
+            {
+                Operand res = GetIntOrZR(context, op.Rn);
+
+                res = context.ShiftLeft   (res, Const(bitsCount - 1 - op.Pos));
+                res = context.ShiftRightSI(res, Const(bitsCount - 1));
+                res = context.BitwiseAnd  (res, Const(res.Type, ~op.TMask));
+
+                Operand n2 = GetBfmN(context);
+
+                SetIntOrZR(context, op.Rd, context.BitwiseOr(res, n2));
+            }
+        }
+
+        public static void Ubfm(ArmEmitterContext context)
+        {
+            OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+            if (op.Pos + 1 == op.GetBitsCount())
+            {
+                EmitUbfmShift(context);
+            }
+            else if (op.Pos < op.Shift)
+            {
+                EmitUbfiz(context);
+            }
+            else if (op.Pos + 1 == op.Shift)
+            {
+                EmitBfmLsl(context);
+            }
+            else if (op.Pos == 7 && op.Shift == 0)
+            {
+                Operand n = GetIntOrZR(context, op.Rn);
+
+                SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xff)));
+            }
+            else if (op.Pos == 15 && op.Shift == 0)
+            {
+                Operand n = GetIntOrZR(context, op.Rn);
+
+                SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xffff)));
+            }
+            else
+            {
+                SetIntOrZR(context, op.Rd, GetBfmN(context));
+            }
+        }
+
+        private static void EmitSbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: true);
+        private static void EmitUbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: false);
+
+        private static void EmitBfiz(ArmEmitterContext context, bool signed)
+        {
+            OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+            int width = op.Pos + 1;
+
+            Operand res = GetIntOrZR(context, op.Rn);
+
+            res = context.ShiftLeft(res, Const(op.GetBitsCount() - width));
+
+            res = signed
+                ? context.ShiftRightSI(res, Const(op.Shift - width))
+                : context.ShiftRightUI(res, Const(op.Shift - width));
+
+            SetIntOrZR(context, op.Rd, res);
+        }
+
+        private static void EmitSbfmShift(ArmEmitterContext context)
+        {
+            EmitBfmShift(context, signed: true);
+        }
+
+        private static void EmitUbfmShift(ArmEmitterContext context)
+        {
+            EmitBfmShift(context, signed: false);
+        }
+
+        private static void EmitBfmShift(ArmEmitterContext context, bool signed)
+        {
+            OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+            Operand res = GetIntOrZR(context, op.Rn);
+
+            res = signed
+                ? context.ShiftRightSI(res, Const(op.Shift))
+                : context.ShiftRightUI(res, Const(op.Shift));
+
+            SetIntOrZR(context, op.Rd, res);
+        }
+
+        private static void EmitBfmLsl(ArmEmitterContext context)
+        {
+            OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+            Operand res = GetIntOrZR(context, op.Rn);
+
+            int shift = op.GetBitsCount() - op.Shift;
+
+            SetIntOrZR(context, op.Rd, context.ShiftLeft(res, Const(shift)));
+        }
+
+        private static Operand GetBfmN(ArmEmitterContext context)
+        {
+            OpCodeBfm op = (OpCodeBfm)context.CurrOp;
+
+            Operand res = GetIntOrZR(context, op.Rn);
+
+            long mask = op.WMask & op.TMask;
+
+            return context.BitwiseAnd(context.RotateRight(res, Const(op.Shift)), Const(res.Type, mask));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitCcmp.cs b/ARMeilleure/Instructions/InstEmitCcmp.cs
new file mode 100644
index 000000000..b1b0a2a1c
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitCcmp.cs
@@ -0,0 +1,61 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitAluHelper;
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Ccmn(ArmEmitterContext context) => EmitCcmp(context, isNegated: true);
+        public static void Ccmp(ArmEmitterContext context) => EmitCcmp(context, isNegated: false);
+
+        private static void EmitCcmp(ArmEmitterContext context, bool isNegated)
+        {
+            OpCodeCcmp op = (OpCodeCcmp)context.CurrOp;
+
+            Operand lblTrue = Label();
+            Operand lblEnd  = Label();
+
+            EmitCondBranch(context, lblTrue, op.Cond);
+
+            SetFlag(context, PState.VFlag, Const((op.Nzcv >> 0) & 1));
+            SetFlag(context, PState.CFlag, Const((op.Nzcv >> 1) & 1));
+            SetFlag(context, PState.ZFlag, Const((op.Nzcv >> 2) & 1));
+            SetFlag(context, PState.NFlag, Const((op.Nzcv >> 3) & 1));
+
+            context.Branch(lblEnd);
+
+            context.MarkLabel(lblTrue);
+
+            Operand n = GetAluN(context);
+            Operand m = GetAluM(context);
+
+            if (isNegated)
+            {
+                Operand d = context.Add(n, m);
+
+                EmitNZFlagsCheck(context, d);
+
+                EmitAddsCCheck(context, n, d);
+                EmitAddsVCheck(context, n, m, d);
+            }
+            else
+            {
+                Operand d = context.Subtract(n, m);
+
+                EmitNZFlagsCheck(context, d);
+
+                EmitSubsCCheck(context, n, m);
+                EmitSubsVCheck(context, n, m, d);
+            }
+
+            context.MarkLabel(lblEnd);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitCsel.cs b/ARMeilleure/Instructions/InstEmitCsel.cs
new file mode 100644
index 000000000..60baf0bc2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitCsel.cs
@@ -0,0 +1,53 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        private enum CselOperation
+        {
+            None,
+            Increment,
+            Invert,
+            Negate
+        }
+
+        public static void Csel(ArmEmitterContext context)  => EmitCsel(context, CselOperation.None);
+        public static void Csinc(ArmEmitterContext context) => EmitCsel(context, CselOperation.Increment);
+        public static void Csinv(ArmEmitterContext context) => EmitCsel(context, CselOperation.Invert);
+        public static void Csneg(ArmEmitterContext context) => EmitCsel(context, CselOperation.Negate);
+
+        private static void EmitCsel(ArmEmitterContext context, CselOperation cselOp)
+        {
+            OpCodeCsel op = (OpCodeCsel)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+            Operand m = GetIntOrZR(context, op.Rm);
+
+            if (cselOp == CselOperation.Increment)
+            {
+                m = context.Add(m, Const(m.Type, 1));
+            }
+            else if (cselOp == CselOperation.Invert)
+            {
+                m = context.BitwiseNot(m);
+            }
+            else if (cselOp == CselOperation.Negate)
+            {
+                m = context.Negate(m);
+            }
+
+            Operand condTrue = GetCondTrue(context, op.Cond);
+
+            Operand d = context.ConditionalSelect(condTrue, n, m);
+
+            SetIntOrZR(context, op.Rd, d);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitDiv.cs b/ARMeilleure/Instructions/InstEmitDiv.cs
new file mode 100644
index 000000000..0c21dd1ba
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitDiv.cs
@@ -0,0 +1,67 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Sdiv(ArmEmitterContext context) => EmitDiv(context, unsigned: false);
+        public static void Udiv(ArmEmitterContext context) => EmitDiv(context, unsigned: true);
+
+        private static void EmitDiv(ArmEmitterContext context, bool unsigned)
+        {
+            OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+            // If Rm == 0, Rd = 0 (division by zero).
+            Operand n = GetIntOrZR(context, op.Rn);
+            Operand m = GetIntOrZR(context, op.Rm);
+
+            Operand divisorIsZero = context.ICompareEqual(m, Const(m.Type, 0));
+
+            Operand lblBadDiv = Label();
+            Operand lblEnd    = Label();
+
+            context.BranchIfTrue(lblBadDiv, divisorIsZero);
+
+            if (!unsigned)
+            {
+                // If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
+                bool is32Bits = op.RegisterSize == RegisterSize.Int32;
+
+                Operand intMin = is32Bits ? Const(int.MinValue) : Const(long.MinValue);
+                Operand minus1 = is32Bits ? Const(-1)           : Const(-1L);
+
+                Operand nIsIntMin = context.ICompareEqual(n, intMin);
+                Operand mIsMinus1 = context.ICompareEqual(m, minus1);
+
+                Operand lblGoodDiv = Label();
+
+                context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1));
+
+                SetAluDOrZR(context, intMin);
+
+                context.Branch(lblEnd);
+
+                context.MarkLabel(lblGoodDiv);
+            }
+
+            Operand d = unsigned
+                ? context.DivideUI(n, m)
+                : context.Divide  (n, m);
+
+            SetAluDOrZR(context, d);
+
+            context.Branch(lblEnd);
+
+            context.MarkLabel(lblBadDiv);
+
+            SetAluDOrZR(context, Const(op.GetOperandType(), 0));
+
+            context.MarkLabel(lblEnd);
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitException.cs b/ARMeilleure/Instructions/InstEmitException.cs
new file mode 100644
index 000000000..6f7b6fd51
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitException.cs
@@ -0,0 +1,55 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Brk(ArmEmitterContext context)
+        {
+            EmitExceptionCall(context, NativeInterface.Break);
+        }
+
+        public static void Svc(ArmEmitterContext context)
+        {
+            EmitExceptionCall(context, NativeInterface.SupervisorCall);
+        }
+
+        private static void EmitExceptionCall(ArmEmitterContext context, _Void_U64_S32 func)
+        {
+            OpCodeException op = (OpCodeException)context.CurrOp;
+
+            context.StoreToContext();
+
+            context.Call(func, Const(op.Address), Const(op.Id));
+
+            context.LoadFromContext();
+
+            if (context.CurrBlock.Next == null)
+            {
+                context.Return(Const(op.Address + 4));
+            }
+        }
+
+        public static void Und(ArmEmitterContext context)
+        {
+            OpCode op = context.CurrOp;
+
+            Delegate dlg = new _Void_U64_S32(NativeInterface.Undefined);
+
+            context.StoreToContext();
+
+            context.Call(dlg, Const(op.Address), Const(op.RawOpCode));
+
+            context.LoadFromContext();
+
+            if (context.CurrBlock.Next == null)
+            {
+                context.Return(Const(op.Address + 4));
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlow.cs b/ARMeilleure/Instructions/InstEmitFlow.cs
new file mode 100644
index 000000000..93d36e1b9
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitFlow.cs
@@ -0,0 +1,159 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitFlowHelper;
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void B(ArmEmitterContext context)
+        {
+            OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+            if (context.CurrBlock.Branch != null)
+            {
+                context.Branch(context.GetLabel((ulong)op.Immediate));
+            }
+            else
+            {
+                context.Return(Const(op.Immediate));
+            }
+        }
+
+        public static void B_Cond(ArmEmitterContext context)
+        {
+            OpCodeBImmCond op = (OpCodeBImmCond)context.CurrOp;
+
+            EmitBranch(context, op.Cond);
+        }
+
+        public static void Bl(ArmEmitterContext context)
+        {
+            OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp;
+
+            context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+            EmitCall(context, (ulong)op.Immediate);
+        }
+
+        public static void Blr(ArmEmitterContext context)
+        {
+            OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+            Operand n = context.Copy(GetIntOrZR(context, op.Rn));
+
+            context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4));
+
+            EmitVirtualCall(context, n);
+        }
+
+        public static void Br(ArmEmitterContext context)
+        {
+            OpCodeBReg op = (OpCodeBReg)context.CurrOp;
+
+            EmitVirtualJump(context, GetIntOrZR(context, op.Rn));
+        }
+
+        public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true);
+        public static void Cbz(ArmEmitterContext context)  => EmitCb(context, onNotZero: false);
+
+        private static void EmitCb(ArmEmitterContext context, bool onNotZero)
+        {
+            OpCodeBImmCmp op = (OpCodeBImmCmp)context.CurrOp;
+
+            EmitBranch(context, GetIntOrZR(context, op.Rt), onNotZero);
+        }
+
+        public static void Ret(ArmEmitterContext context)
+        {
+            context.Return(context.BitwiseOr(GetIntOrZR(context, RegisterAlias.Lr), Const(CallFlag)));
+        }
+
+        public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true);
+        public static void Tbz(ArmEmitterContext context)  => EmitTb(context, onNotZero: false);
+
+        private static void EmitTb(ArmEmitterContext context, bool onNotZero)
+        {
+            OpCodeBImmTest op = (OpCodeBImmTest)context.CurrOp;
+
+            Operand value = context.BitwiseAnd(GetIntOrZR(context, op.Rt), Const(1L << op.Bit));
+
+            EmitBranch(context, value, onNotZero);
+        }
+
+        private static void EmitBranch(ArmEmitterContext context, Condition cond)
+        {
+            OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+            if (context.CurrBlock.Branch != null)
+            {
+                EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond);
+
+                if (context.CurrBlock.Next == null)
+                {
+                    context.Return(Const(op.Address + 4));
+                }
+            }
+            else
+            {
+                Operand lblTaken = Label();
+
+                EmitCondBranch(context, lblTaken, cond);
+
+                context.Return(Const(op.Address + 4));
+
+                context.MarkLabel(lblTaken);
+
+                context.Return(Const(op.Immediate));
+            }
+        }
+
+        private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero)
+        {
+            OpCodeBImm op = (OpCodeBImm)context.CurrOp;
+
+            if (context.CurrBlock.Branch != null)
+            {
+                Operand lblTarget = context.GetLabel((ulong)op.Immediate);
+
+                if (onNotZero)
+                {
+                    context.BranchIfTrue(lblTarget, value);
+                }
+                else
+                {
+                    context.BranchIfFalse(lblTarget, value);
+                }
+
+                if (context.CurrBlock.Next == null)
+                {
+                    context.Return(Const(op.Address + 4));
+                }
+            }
+            else
+            {
+                Operand lblTaken = Label();
+
+                if (onNotZero)
+                {
+                    context.BranchIfTrue(lblTaken, value);
+                }
+                else
+                {
+                    context.BranchIfFalse(lblTaken, value);
+                }
+
+                context.Return(Const(op.Address + 4));
+
+                context.MarkLabel(lblTaken);
+
+                context.Return(Const(op.Immediate));
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs
new file mode 100644
index 000000000..27addc78e
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitFlow32.cs
@@ -0,0 +1,71 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit32
+    {
+        public static void B(ArmEmitterContext context)
+        {
+            IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+            if (context.CurrBlock.Branch != null)
+            {
+                context.Branch(context.GetLabel((ulong)op.Immediate));
+            }
+            else
+            {
+                context.StoreToContext();
+
+                context.Return(Const(op.Immediate));
+            }
+        }
+
+        public static void Bl(ArmEmitterContext context)
+        {
+            Blx(context, x: false);
+        }
+
+        public static void Blx(ArmEmitterContext context)
+        {
+            Blx(context, x: true);
+        }
+
+        public static void Bx(ArmEmitterContext context)
+        {
+            IOpCode32BReg op = (IOpCode32BReg)context.CurrOp;
+
+            context.StoreToContext();
+
+            EmitBxWritePc(context, GetIntA32(context, op.Rm));
+        }
+
+        private static void Blx(ArmEmitterContext context, bool x)
+        {
+            IOpCode32BImm op = (IOpCode32BImm)context.CurrOp;
+
+            uint pc = op.GetPc();
+
+            bool isThumb = IsThumb(context.CurrOp);
+
+            uint currentPc = isThumb
+                ? op.GetPc() | 1
+                : op.GetPc() - 4;
+
+            SetIntOrSP(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc));
+
+            // If x is true, then this is a branch with link and exchange.
+            // In this case we need to swap the mode between Arm <-> Thumb.
+            if (x)
+            {
+                SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1));
+            }
+
+            InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/ARMeilleure/Instructions/InstEmitFlowHelper.cs
new file mode 100644
index 000000000..a8eb21d33
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitFlowHelper.cs
@@ -0,0 +1,192 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static class InstEmitFlowHelper
+    {
+        public const ulong CallFlag = 1;
+
+        public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond)
+        {
+            if (cond != Condition.Al)
+            {
+                context.BranchIfTrue(target, GetCondTrue(context, cond));
+            }
+            else
+            {
+                context.Branch(target);
+            }
+        }
+
+        public static Operand GetCondTrue(ArmEmitterContext context, Condition condition)
+        {
+            Operand cmpResult = context.TryGetComparisonResult(condition);
+
+            if (cmpResult != null)
+            {
+                return cmpResult;
+            }
+
+            Operand value = Const(1);
+
+            Operand Inverse(Operand val)
+            {
+                return context.BitwiseExclusiveOr(val, Const(1));
+            }
+
+            switch (condition)
+            {
+                case Condition.Eq:
+                    value = GetFlag(PState.ZFlag);
+                    break;
+
+                case Condition.Ne:
+                    value = Inverse(GetFlag(PState.ZFlag));
+                    break;
+
+                case Condition.GeUn:
+                    value = GetFlag(PState.CFlag);
+                    break;
+
+                case Condition.LtUn:
+                    value = Inverse(GetFlag(PState.CFlag));
+                    break;
+
+                case Condition.Mi:
+                    value = GetFlag(PState.NFlag);
+                    break;
+
+                case Condition.Pl:
+                    value = Inverse(GetFlag(PState.NFlag));
+                    break;
+
+                case Condition.Vs:
+                    value = GetFlag(PState.VFlag);
+                    break;
+
+                case Condition.Vc:
+                    value = Inverse(GetFlag(PState.VFlag));
+                    break;
+
+                case Condition.GtUn:
+                {
+                    Operand c = GetFlag(PState.CFlag);
+                    Operand z = GetFlag(PState.ZFlag);
+
+                    value = context.BitwiseAnd(c, Inverse(z));
+
+                    break;
+                }
+
+                case Condition.LeUn:
+                {
+                    Operand c = GetFlag(PState.CFlag);
+                    Operand z = GetFlag(PState.ZFlag);
+
+                    value = context.BitwiseOr(Inverse(c), z);
+
+                    break;
+                }
+
+                case Condition.Ge:
+                {
+                    Operand n = GetFlag(PState.NFlag);
+                    Operand v = GetFlag(PState.VFlag);
+
+                    value = context.ICompareEqual(n, v);
+
+                    break;
+                }
+
+                case Condition.Lt:
+                {
+                    Operand n = GetFlag(PState.NFlag);
+                    Operand v = GetFlag(PState.VFlag);
+
+                    value = context.ICompareNotEqual(n, v);
+
+                    break;
+                }
+
+                case Condition.Gt:
+                {
+                    Operand n = GetFlag(PState.NFlag);
+                    Operand z = GetFlag(PState.ZFlag);
+                    Operand v = GetFlag(PState.VFlag);
+
+                    value = context.BitwiseAnd(Inverse(z), context.ICompareEqual(n, v));
+
+                    break;
+                }
+
+                case Condition.Le:
+                {
+                    Operand n = GetFlag(PState.NFlag);
+                    Operand z = GetFlag(PState.ZFlag);
+                    Operand v = GetFlag(PState.VFlag);
+
+                    value = context.BitwiseOr(z, context.ICompareNotEqual(n, v));
+
+                    break;
+                }
+            }
+
+            return value;
+        }
+
+        public static void EmitCall(ArmEmitterContext context, ulong immediate)
+        {
+            context.Return(Const(immediate | CallFlag));
+        }
+
+        public static void EmitVirtualCall(ArmEmitterContext context, Operand target)
+        {
+            EmitVirtualCallOrJump(context, target, isJump: false);
+        }
+
+        public static void EmitVirtualJump(ArmEmitterContext context, Operand target)
+        {
+            EmitVirtualCallOrJump(context, target, isJump: true);
+        }
+
+        private static void EmitVirtualCallOrJump(ArmEmitterContext context, Operand target, bool isJump)
+        {
+            context.Return(context.BitwiseOr(target, Const(target.Type, (long)CallFlag)));
+        }
+
+        private static void EmitContinueOrReturnCheck(ArmEmitterContext context, Operand retVal)
+        {
+            // Note: The return value of the called method will be placed
+            // at the Stack, the return value is always a Int64 with the
+            // return address of the function. We check if the address is
+            // correct, if it isn't we keep returning until we reach the dispatcher.
+            ulong nextAddr = GetNextOpAddress(context.CurrOp);
+
+            if (context.CurrBlock.Next != null)
+            {
+                Operand lblContinue = Label();
+
+                context.BranchIfTrue(lblContinue, context.ICompareEqual(retVal, Const(nextAddr)));
+
+                context.Return(Const(nextAddr));
+
+                context.MarkLabel(lblContinue);
+            }
+            else
+            {
+                context.Return(Const(nextAddr));
+            }
+        }
+
+        private static ulong GetNextOpAddress(OpCode op)
+        {
+            return op.Address + (ulong)op.OpCodeSizeInBytes;
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitHash.cs b/ARMeilleure/Instructions/InstEmitHash.cs
new file mode 100644
index 000000000..0be8458e2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitHash.cs
@@ -0,0 +1,64 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Crc32b(ArmEmitterContext context)
+        {
+            EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32b));
+        }
+
+        public static void Crc32h(ArmEmitterContext context)
+        {
+            EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32h));
+        }
+
+        public static void Crc32w(ArmEmitterContext context)
+        {
+            EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32w));
+        }
+
+        public static void Crc32x(ArmEmitterContext context)
+        {
+            EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32x));
+        }
+
+        public static void Crc32cb(ArmEmitterContext context)
+        {
+            EmitCrc32Call(context, new _U32_U32_U8(SoftFallback.Crc32cb));
+        }
+
+        public static void Crc32ch(ArmEmitterContext context)
+        {
+            EmitCrc32Call(context, new _U32_U32_U16(SoftFallback.Crc32ch));
+        }
+
+        public static void Crc32cw(ArmEmitterContext context)
+        {
+            EmitCrc32Call(context, new _U32_U32_U32(SoftFallback.Crc32cw));
+        }
+
+        public static void Crc32cx(ArmEmitterContext context)
+        {
+            EmitCrc32Call(context, new _U32_U32_U64(SoftFallback.Crc32cx));
+        }
+
+        private static void EmitCrc32Call(ArmEmitterContext context, Delegate dlg)
+        {
+            OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+            Operand m = GetIntOrZR(context, op.Rm);
+
+            Operand d = context.Call(dlg, n, m);
+
+            SetIntOrZR(context, op.Rd, d);
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs
new file mode 100644
index 000000000..02e104a4f
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitHelper.cs
@@ -0,0 +1,218 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static class InstEmitHelper
+    {
+        public static bool IsThumb(OpCode op)
+        {
+            return op is OpCodeT16;
+        }
+
+        public static Operand GetExtendedM(ArmEmitterContext context, int rm, IntType type)
+        {
+            Operand value = GetIntOrZR(context, rm);
+
+            switch (type)
+            {
+                case IntType.UInt8:  value = context.ZeroExtend8 (value.Type, value); break;
+                case IntType.UInt16: value = context.ZeroExtend16(value.Type, value); break;
+                case IntType.UInt32: value = context.ZeroExtend32(value.Type, value); break;
+
+                case IntType.Int8:  value = context.SignExtend8 (value.Type, value); break;
+                case IntType.Int16: value = context.SignExtend16(value.Type, value); break;
+                case IntType.Int32: value = context.SignExtend32(value.Type, value); break;
+            }
+
+            return value;
+        }
+
+        public static Operand GetIntA32(ArmEmitterContext context, int regIndex)
+        {
+            if (regIndex == RegisterAlias.Aarch32Pc)
+            {
+                OpCode32 op = (OpCode32)context.CurrOp;
+
+                return Const((int)op.GetPc());
+            }
+            else
+            {
+                return GetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex));
+            }
+        }
+
+        public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value)
+        {
+            if (regIndex == RegisterAlias.Aarch32Pc)
+            {
+                context.StoreToContext();
+
+                EmitBxWritePc(context, value);
+            }
+            else
+            {
+                SetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex), value);
+            }
+        }
+
+        public static int GetRegisterAlias(Aarch32Mode mode, int regIndex)
+        {
+            // Only registers >= 8 are banked,
+            // with registers in the range [8, 12] being
+            // banked for the FIQ mode, and registers
+            // 13 and 14 being banked for all modes.
+            if ((uint)regIndex < 8)
+            {
+                return regIndex;
+            }
+
+            return GetBankedRegisterAlias(mode, regIndex);
+        }
+
+        public static int GetBankedRegisterAlias(Aarch32Mode mode, int regIndex)
+        {
+            switch (regIndex)
+            {
+                case 8: return mode == Aarch32Mode.Fiq
+                    ? RegisterAlias.R8Fiq
+                    : RegisterAlias.R8Usr;
+
+                case 9: return mode == Aarch32Mode.Fiq
+                    ? RegisterAlias.R9Fiq
+                    : RegisterAlias.R9Usr;
+
+                case 10: return mode == Aarch32Mode.Fiq
+                    ? RegisterAlias.R10Fiq
+                    : RegisterAlias.R10Usr;
+
+                case 11: return mode == Aarch32Mode.Fiq
+                    ? RegisterAlias.R11Fiq
+                    : RegisterAlias.R11Usr;
+
+                case 12: return mode == Aarch32Mode.Fiq
+                    ? RegisterAlias.R12Fiq
+                    : RegisterAlias.R12Usr;
+
+                case 13:
+                    switch (mode)
+                    {
+                        case Aarch32Mode.User:
+                        case Aarch32Mode.System:     return RegisterAlias.SpUsr;
+                        case Aarch32Mode.Fiq:        return RegisterAlias.SpFiq;
+                        case Aarch32Mode.Irq:        return RegisterAlias.SpIrq;
+                        case Aarch32Mode.Supervisor: return RegisterAlias.SpSvc;
+                        case Aarch32Mode.Abort:      return RegisterAlias.SpAbt;
+                        case Aarch32Mode.Hypervisor: return RegisterAlias.SpHyp;
+                        case Aarch32Mode.Undefined:  return RegisterAlias.SpUnd;
+
+                        default: throw new ArgumentException(nameof(mode));
+                    }
+
+                case 14:
+                    switch (mode)
+                    {
+                        case Aarch32Mode.User:
+                        case Aarch32Mode.Hypervisor:
+                        case Aarch32Mode.System:     return RegisterAlias.LrUsr;
+                        case Aarch32Mode.Fiq:        return RegisterAlias.LrFiq;
+                        case Aarch32Mode.Irq:        return RegisterAlias.LrIrq;
+                        case Aarch32Mode.Supervisor: return RegisterAlias.LrSvc;
+                        case Aarch32Mode.Abort:      return RegisterAlias.LrAbt;
+                        case Aarch32Mode.Undefined:  return RegisterAlias.LrUnd;
+
+                        default: throw new ArgumentException(nameof(mode));
+                    }
+
+                default: throw new ArgumentOutOfRangeException(nameof(regIndex));
+            }
+        }
+
+        public static void EmitBxWritePc(ArmEmitterContext context, Operand pc)
+        {
+            Operand mode = context.BitwiseAnd(pc, Const(1));
+
+            SetFlag(context, PState.TFlag, mode);
+
+            Operand lblArmMode = Label();
+
+            context.BranchIfTrue(lblArmMode, mode);
+
+            context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~1))));
+
+            context.MarkLabel(lblArmMode);
+
+            context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~3))));
+        }
+
+        public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex)
+        {
+            if (regIndex == RegisterConsts.ZeroIndex)
+            {
+                OperandType type = context.CurrOp.GetOperandType();
+
+                return type == OperandType.I32 ? Const(0) : Const(0L);
+            }
+            else
+            {
+                return GetIntOrSP(context, regIndex);
+            }
+        }
+
+        public static void SetIntOrZR(ArmEmitterContext context, int regIndex, Operand value)
+        {
+            if (regIndex == RegisterConsts.ZeroIndex)
+            {
+                return;
+            }
+
+            SetIntOrSP(context, regIndex, value);
+        }
+
+        public static Operand GetIntOrSP(ArmEmitterContext context, int regIndex)
+        {
+            Operand value = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+            if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+            {
+                value = context.ConvertI64ToI32(value);
+            }
+
+            return value;
+        }
+
+        public static void SetIntOrSP(ArmEmitterContext context, int regIndex, Operand value)
+        {
+            Operand reg = Register(regIndex, RegisterType.Integer, OperandType.I64);
+
+            if (value.Type == OperandType.I32)
+            {
+                value = context.ZeroExtend32(OperandType.I64, value);
+            }
+
+            context.Copy(reg, value);
+        }
+
+        public static Operand GetVec(int regIndex)
+        {
+            return Register(regIndex, RegisterType.Vector, OperandType.V128);
+        }
+
+        public static Operand GetFlag(PState stateFlag)
+        {
+            return Register((int)stateFlag, RegisterType.Flag, OperandType.I32);
+        }
+
+        public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value)
+        {
+            context.Copy(GetFlag(stateFlag), value);
+
+            context.MarkFlagSet(stateFlag);
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitMemory.cs b/ARMeilleure/Instructions/InstEmitMemory.cs
new file mode 100644
index 000000000..1d5953fb2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemory.cs
@@ -0,0 +1,177 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Adr(ArmEmitterContext context)
+        {
+            OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+            SetIntOrZR(context, op.Rd, Const(op.Address + (ulong)op.Immediate));
+        }
+
+        public static void Adrp(ArmEmitterContext context)
+        {
+            OpCodeAdr op = (OpCodeAdr)context.CurrOp;
+
+            ulong address = (op.Address & ~0xfffUL) + ((ulong)op.Immediate << 12);
+
+            SetIntOrZR(context, op.Rd, Const(address));
+        }
+
+        public static void Ldr(ArmEmitterContext context)  => EmitLdr(context, signed: false);
+        public static void Ldrs(ArmEmitterContext context) => EmitLdr(context, signed: true);
+
+        private static void EmitLdr(ArmEmitterContext context, bool signed)
+        {
+            OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+            Operand address = GetAddress(context);
+
+            if (signed && op.Extend64)
+            {
+                EmitLoadSx64(context, address, op.Rt, op.Size);
+            }
+            else if (signed)
+            {
+                EmitLoadSx32(context, address, op.Rt, op.Size);
+            }
+            else
+            {
+                EmitLoadZx(context, address, op.Rt, op.Size);
+            }
+
+            EmitWBackIfNeeded(context, address);
+        }
+
+        public static void Ldr_Literal(ArmEmitterContext context)
+        {
+            IOpCodeLit op = (IOpCodeLit)context.CurrOp;
+
+            if (op.Prefetch)
+            {
+                return;
+            }
+
+            if (op.Signed)
+            {
+                EmitLoadSx64(context, Const(op.Immediate), op.Rt, op.Size);
+            }
+            else
+            {
+                EmitLoadZx(context, Const(op.Immediate), op.Rt, op.Size);
+            }
+        }
+
+        public static void Ldp(ArmEmitterContext context)
+        {
+            OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+            void EmitLoad(int rt, Operand ldAddr)
+            {
+                if (op.Extend64)
+                {
+                    EmitLoadSx64(context, ldAddr, rt, op.Size);
+                }
+                else
+                {
+                    EmitLoadZx(context, ldAddr, rt, op.Size);
+                }
+            }
+
+            Operand address = GetAddress(context);
+
+            Operand address2 = context.Add(address, Const(1L << op.Size));
+
+            EmitLoad(op.Rt,  address);
+            EmitLoad(op.Rt2, address2);
+
+            EmitWBackIfNeeded(context, address);
+        }
+
+        public static void Str(ArmEmitterContext context)
+        {
+            OpCodeMem op = (OpCodeMem)context.CurrOp;
+
+            Operand address = GetAddress(context);
+
+            InstEmitMemoryHelper.EmitStore(context, address, op.Rt, op.Size);
+
+            EmitWBackIfNeeded(context, address);
+        }
+
+        public static void Stp(ArmEmitterContext context)
+        {
+            OpCodeMemPair op = (OpCodeMemPair)context.CurrOp;
+
+            Operand address = GetAddress(context);
+
+            Operand address2 = context.Add(address, Const(1L << op.Size));
+
+            InstEmitMemoryHelper.EmitStore(context, address,  op.Rt,  op.Size);
+            InstEmitMemoryHelper.EmitStore(context, address2, op.Rt2, op.Size);
+
+            EmitWBackIfNeeded(context, address);
+        }
+
+        private static Operand GetAddress(ArmEmitterContext context)
+        {
+            Operand address = null;
+
+            switch (context.CurrOp)
+            {
+                case OpCodeMemImm op:
+                {
+                    address = context.Copy(GetIntOrSP(context, op.Rn));
+
+                    // Pre-indexing.
+                    if (!op.PostIdx)
+                    {
+                        address = context.Add(address, Const(op.Immediate));
+                    }
+
+                    break;
+                }
+
+                case OpCodeMemReg op:
+                {
+                    Operand n = GetIntOrSP(context, op.Rn);
+
+                    Operand m = GetExtendedM(context, op.Rm, op.IntType);
+
+                    if (op.Shift)
+                    {
+                        m = context.ShiftLeft(m, Const(op.Size));
+                    }
+
+                    address = context.Add(n, m);
+
+                    break;
+                }
+            }
+
+            return address;
+        }
+
+        private static void EmitWBackIfNeeded(ArmEmitterContext context, Operand address)
+        {
+            // Check whenever the current OpCode has post-indexed write back, if so write it.
+            if (context.CurrOp is OpCodeMemImm op && op.WBack)
+            {
+                if (op.PostIdx)
+                {
+                    address = context.Add(address, Const(op.Immediate));
+                }
+
+                SetIntOrSP(context, op.Rn, address);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMemory32.cs b/ARMeilleure/Instructions/InstEmitMemory32.cs
new file mode 100644
index 000000000..002d2c5c6
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemory32.cs
@@ -0,0 +1,256 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit32
+    {
+        private const int ByteSizeLog2  = 0;
+        private const int HWordSizeLog2 = 1;
+        private const int WordSizeLog2  = 2;
+        private const int DWordSizeLog2 = 3;
+
+        [Flags]
+        enum AccessType
+        {
+            Store  = 0,
+            Signed = 1,
+            Load   = 2,
+
+            LoadZx = Load,
+            LoadSx = Load | Signed,
+        }
+
+        public static void Ldm(ArmEmitterContext context)
+        {
+            OpCode32MemMult op = (OpCode32MemMult)context.CurrOp;
+
+            Operand n = GetIntA32(context, op.Rn);
+
+            Operand baseAddress = context.Add(n, Const(op.Offset));
+
+            bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0;
+
+            bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc);
+
+            if (writeBack)
+            {
+                SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+            }
+
+            int mask   = op.RegisterMask;
+            int offset = 0;
+
+            for (int register = 0; mask != 0; mask >>= 1, register++)
+            {
+                if ((mask & 1) != 0)
+                {
+                    Operand address = context.Add(baseAddress, Const(offset));
+
+                    EmitLoadZx(context, address, register, WordSizeLog2);
+
+                    offset += 4;
+                }
+            }
+        }
+
+        public static void Ldr(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx);
+        }
+
+        public static void Ldrb(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx);
+        }
+
+        public static void Ldrd(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx);
+        }
+
+        public static void Ldrh(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx);
+        }
+
+        public static void Ldrsb(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx);
+        }
+
+        public static void Ldrsh(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx);
+        }
+
+        public static void Stm(ArmEmitterContext context)
+        {
+            OpCode32MemMult op = (OpCode32MemMult)context.CurrOp;
+
+            Operand n = GetIntA32(context, op.Rn);
+
+            Operand baseAddress = context.Add(n, Const(op.Offset));
+
+            int mask   = op.RegisterMask;
+            int offset = 0;
+
+            for (int register = 0; mask != 0; mask >>= 1, register++)
+            {
+                if ((mask & 1) != 0)
+                {
+                    Operand address = context.Add(baseAddress, Const(offset));
+
+                    EmitStore(context, address, register, WordSizeLog2);
+
+                    // Note: If Rn is also specified on the register list,
+                    // and Rn is the first register on this list, then the
+                    // value that is written to memory is the unmodified value,
+                    // before the write back. If it is on the list, but it's
+                    // not the first one, then the value written to memory
+                    // varies between CPUs.
+                    if (offset == 0 && op.PostOffset != 0)
+                    {
+                        // Emit write back after the first write.
+                        SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset)));
+                    }
+
+                    offset += 4;
+                }
+            }
+        }
+
+        public static void Str(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, WordSizeLog2, AccessType.Store);
+        }
+
+        public static void Strb(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store);
+        }
+
+        public static void Strd(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store);
+        }
+
+        public static void Strh(ArmEmitterContext context)
+        {
+            EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store);
+        }
+
+        private static void EmitLoadOrStore(ArmEmitterContext context, int size, AccessType accType)
+        {
+            OpCode32Mem op = (OpCode32Mem)context.CurrOp;
+
+            Operand n = context.Copy(GetIntA32(context, op.Rn));
+
+            Operand temp = null;
+
+            if (op.Index || op.WBack)
+            {
+                temp = op.Add
+                    ? context.Add     (n, Const(op.Immediate))
+                    : context.Subtract(n, Const(op.Immediate));
+            }
+
+            if (op.WBack)
+            {
+                SetIntA32(context, op.Rn, temp);
+            }
+
+            Operand address;
+
+            if (op.Index)
+            {
+                address = temp;
+            }
+            else
+            {
+                address = n;
+            }
+
+            if ((accType & AccessType.Load) != 0)
+            {
+                void Load(int rt, int offs, int loadSize)
+                {
+                    Operand addr = context.Add(address, Const(offs));
+
+                    if ((accType & AccessType.Signed) != 0)
+                    {
+                        EmitLoadSx32(context, addr, rt, loadSize);
+                    }
+                    else
+                    {
+                        EmitLoadZx(context, addr, rt, loadSize);
+                    }
+                }
+
+                if (size == DWordSizeLog2)
+                {
+                    Operand lblBigEndian = Label();
+                    Operand lblEnd       = Label();
+
+                    context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+                    Load(op.Rt,     0, WordSizeLog2);
+                    Load(op.Rt | 1, 4, WordSizeLog2);
+
+                    context.Branch(lblEnd);
+
+                    context.MarkLabel(lblBigEndian);
+
+                    Load(op.Rt | 1, 0, WordSizeLog2);
+                    Load(op.Rt,     4, WordSizeLog2);
+
+                    context.MarkLabel(lblEnd);
+                }
+                else
+                {
+                    Load(op.Rt, 0, size);
+                }
+            }
+            else
+            {
+                void Store(int rt, int offs, int storeSize)
+                {
+                    Operand addr = context.Add(address, Const(offs));
+
+                    EmitStore(context, addr, rt, storeSize);
+                }
+
+                if (size == DWordSizeLog2)
+                {
+                    Operand lblBigEndian = Label();
+                    Operand lblEnd       = Label();
+
+                    context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag));
+
+                    Store(op.Rt,     0, WordSizeLog2);
+                    Store(op.Rt | 1, 4, WordSizeLog2);
+
+                    context.Branch(lblEnd);
+
+                    context.MarkLabel(lblBigEndian);
+
+                    Store(op.Rt | 1, 0, WordSizeLog2);
+                    Store(op.Rt,     4, WordSizeLog2);
+
+                    context.MarkLabel(lblEnd);
+                }
+                else
+                {
+                    Store(op.Rt, 0, size);
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs
new file mode 100644
index 000000000..bcca7619d
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs
@@ -0,0 +1,261 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        [Flags]
+        private enum AccessType
+        {
+            None      = 0,
+            Ordered   = 1,
+            Exclusive = 2,
+            OrderedEx = Ordered | Exclusive
+        }
+
+        public static void Clrex(ArmEmitterContext context)
+        {
+            context.Call(new _Void(NativeInterface.ClearExclusive));
+        }
+
+        public static void Dmb(ArmEmitterContext context) => EmitBarrier(context);
+        public static void Dsb(ArmEmitterContext context) => EmitBarrier(context);
+
+        public static void Ldar(ArmEmitterContext context)  => EmitLdr(context, AccessType.Ordered);
+        public static void Ldaxr(ArmEmitterContext context) => EmitLdr(context, AccessType.OrderedEx);
+        public static void Ldxr(ArmEmitterContext context)  => EmitLdr(context, AccessType.Exclusive);
+        public static void Ldxp(ArmEmitterContext context)  => EmitLdp(context, AccessType.Exclusive);
+        public static void Ldaxp(ArmEmitterContext context) => EmitLdp(context, AccessType.OrderedEx);
+
+        private static void EmitLdr(ArmEmitterContext context, AccessType accType)
+        {
+            EmitLoadEx(context, accType, pair: false);
+        }
+
+        private static void EmitLdp(ArmEmitterContext context, AccessType accType)
+        {
+            EmitLoadEx(context, accType, pair: true);
+        }
+
+        private static void EmitLoadEx(ArmEmitterContext context, AccessType accType, bool pair)
+        {
+            OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+            bool ordered   = (accType & AccessType.Ordered)   != 0;
+            bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+            if (ordered)
+            {
+                EmitBarrier(context);
+            }
+
+            Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+            if (pair)
+            {
+                // Exclusive loads should be atomic. For pairwise loads, we need to
+                // read all the data at once. For a 32-bits pairwise load, we do a
+                // simple 64-bits load, for a 128-bits load, we need to call a special
+                // method to read 128-bits atomically.
+                if (op.Size == 2)
+                {
+                    Operand value = EmitLoad(context, address, exclusive, 3);
+
+                    Operand valueLow = context.ConvertI64ToI32(value);
+
+                    valueLow = context.ZeroExtend32(OperandType.I64, valueLow);
+
+                    Operand valueHigh = context.ShiftRightUI(value, Const(32));
+
+                    SetIntOrZR(context, op.Rt,  valueLow);
+                    SetIntOrZR(context, op.Rt2, valueHigh);
+                }
+                else if (op.Size == 3)
+                {
+                    Operand value = EmitLoad(context, address, exclusive, 4);
+
+                    Operand valueLow  = context.VectorExtract(OperandType.I64, value, 0);
+                    Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1);
+
+                    SetIntOrZR(context, op.Rt,  valueLow);
+                    SetIntOrZR(context, op.Rt2, valueHigh);
+                }
+                else
+                {
+                    throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes.");
+                }
+            }
+            else
+            {
+                // 8, 16, 32 or 64-bits (non-pairwise) load.
+                Operand value = EmitLoad(context, address, exclusive, op.Size);
+
+                SetIntOrZR(context, op.Rt, value);
+            }
+        }
+
+        private static Operand EmitLoad(
+            ArmEmitterContext context,
+            Operand address,
+            bool exclusive,
+            int size)
+        {
+            Delegate fallbackMethodDlg = null;
+
+            if (exclusive)
+            {
+                switch (size)
+                {
+                    case 0: fallbackMethodDlg = new _U8_U64  (NativeInterface.ReadByteExclusive);      break;
+                    case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16Exclusive);    break;
+                    case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32Exclusive);    break;
+                    case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64Exclusive);    break;
+                    case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break;
+                }
+            }
+            else
+            {
+                switch (size)
+                {
+                    case 0: fallbackMethodDlg = new _U8_U64  (NativeInterface.ReadByte);      break;
+                    case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16);    break;
+                    case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32);    break;
+                    case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64);    break;
+                    case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break;
+                }
+            }
+
+            return context.Call(fallbackMethodDlg, address);
+        }
+
+        public static void Pfrm(ArmEmitterContext context)
+        {
+            // Memory Prefetch, execute as no-op.
+        }
+
+        public static void Stlr(ArmEmitterContext context)  => EmitStr(context, AccessType.Ordered);
+        public static void Stlxr(ArmEmitterContext context) => EmitStr(context, AccessType.OrderedEx);
+        public static void Stxr(ArmEmitterContext context)  => EmitStr(context, AccessType.Exclusive);
+        public static void Stxp(ArmEmitterContext context)  => EmitStp(context, AccessType.Exclusive);
+        public static void Stlxp(ArmEmitterContext context) => EmitStp(context, AccessType.OrderedEx);
+
+        private static void EmitStr(ArmEmitterContext context, AccessType accType)
+        {
+            EmitStoreEx(context, accType, pair: false);
+        }
+
+        private static void EmitStp(ArmEmitterContext context, AccessType accType)
+        {
+            EmitStoreEx(context, accType, pair: true);
+        }
+
+        private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair)
+        {
+            OpCodeMemEx op = (OpCodeMemEx)context.CurrOp;
+
+            bool ordered   = (accType & AccessType.Ordered)   != 0;
+            bool exclusive = (accType & AccessType.Exclusive) != 0;
+
+            if (ordered)
+            {
+                EmitBarrier(context);
+            }
+
+            Operand address = context.Copy(GetIntOrSP(context, op.Rn));
+
+            Operand t = GetIntOrZR(context, op.Rt);
+
+            Operand s = null;
+
+            if (pair)
+            {
+                Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store.");
+
+                Operand t2 = GetIntOrZR(context, op.Rt2);
+
+                Operand value;
+
+                if (op.Size == 2)
+                {
+                    value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32)));
+                }
+                else /* if (op.Size == 3) */
+                {
+                    value = context.VectorInsert(context.VectorZero(), t,  0);
+                    value = context.VectorInsert(value,                t2, 1);
+                }
+
+                s = EmitStore(context, address, value, exclusive, op.Size + 1);
+            }
+            else
+            {
+                s = EmitStore(context, address, t, exclusive, op.Size);
+            }
+
+            if (s != null)
+            {
+                // This is only needed for exclusive stores. The function returns 0
+                // when the store is successful, and 1 otherwise.
+                SetIntOrZR(context, op.Rs, s);
+            }
+        }
+
+        private static Operand EmitStore(
+            ArmEmitterContext context,
+            Operand address,
+            Operand value,
+            bool exclusive,
+            int size)
+        {
+            if (size < 3)
+            {
+                value = context.ConvertI64ToI32(value);
+            }
+
+            Delegate fallbackMethodDlg = null;
+
+            if (exclusive)
+            {
+                switch (size)
+                {
+                    case 0: fallbackMethodDlg = new _S32_U64_U8  (NativeInterface.WriteByteExclusive);      break;
+                    case 1: fallbackMethodDlg = new _S32_U64_U16 (NativeInterface.WriteUInt16Exclusive);    break;
+                    case 2: fallbackMethodDlg = new _S32_U64_U32 (NativeInterface.WriteUInt32Exclusive);    break;
+                    case 3: fallbackMethodDlg = new _S32_U64_U64 (NativeInterface.WriteUInt64Exclusive);    break;
+                    case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break;
+                }
+
+                return context.Call(fallbackMethodDlg, address, value);
+            }
+            else
+            {
+                switch (size)
+                {
+                    case 0: fallbackMethodDlg = new _Void_U64_U8  (NativeInterface.WriteByte);      break;
+                    case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16);    break;
+                    case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32);    break;
+                    case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64);    break;
+                    case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break;
+                }
+
+                context.Call(fallbackMethodDlg, address, value);
+
+                return null;
+            }
+        }
+
+        private static void EmitBarrier(ArmEmitterContext context)
+        {
+            // Note: This barrier is most likely not necessary, and probably
+            // doesn't make any difference since we need to do a ton of stuff
+            // (software MMU emulation) to read or write anything anyway.
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
new file mode 100644
index 000000000..0ae5e3f26
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs
@@ -0,0 +1,512 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static class InstEmitMemoryHelper
+    {
+        private enum Extension
+        {
+            Zx,
+            Sx32,
+            Sx64
+        }
+
+        public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size)
+        {
+            EmitLoad(context, address, Extension.Zx, rt, size);
+        }
+
+        public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size)
+        {
+            EmitLoad(context, address, Extension.Sx32, rt, size);
+        }
+
+        public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size)
+        {
+            EmitLoad(context, address, Extension.Sx64, rt, size);
+        }
+
+        private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size)
+        {
+            bool isSimd = IsSimd(context);
+
+            if ((uint)size > (isSimd ? 4 : 3))
+            {
+                throw new ArgumentOutOfRangeException(nameof(size));
+            }
+
+            if (isSimd)
+            {
+                EmitReadVector(context, address, context.VectorZero(), rt, 0, size);
+            }
+            else
+            {
+                EmitReadInt(context, address, rt, size);
+            }
+
+            if (!isSimd)
+            {
+                Operand value = GetIntOrZR(context, rt);
+
+                if (ext == Extension.Sx32 || ext == Extension.Sx64)
+                {
+                    OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32;
+
+                    switch (size)
+                    {
+                        case 0: value = context.SignExtend8 (destType, value); break;
+                        case 1: value = context.SignExtend16(destType, value); break;
+                        case 2: value = context.SignExtend32(destType, value); break;
+                    }
+                }
+
+                SetIntOrZR(context, rt, value);
+            }
+        }
+
+        public static void EmitLoadSimd(
+            ArmEmitterContext context,
+            Operand address,
+            Operand vector,
+            int rt,
+            int elem,
+            int size)
+        {
+            EmitReadVector(context, address, vector, rt, elem, size);
+        }
+
+        public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size)
+        {
+            bool isSimd = IsSimd(context);
+
+            if ((uint)size > (isSimd ? 4 : 3))
+            {
+                throw new ArgumentOutOfRangeException(nameof(size));
+            }
+
+            if (isSimd)
+            {
+                EmitWriteVector(context, address, rt, 0, size);
+            }
+            else
+            {
+                EmitWriteInt(context, address, rt, size);
+            }
+        }
+
+        public static void EmitStoreSimd(
+            ArmEmitterContext context,
+            Operand address,
+            int rt,
+            int elem,
+            int size)
+        {
+            EmitWriteVector(context, address, rt, elem, size);
+        }
+
+        private static bool IsSimd(ArmEmitterContext context)
+        {
+            return context.CurrOp is IOpCodeSimd &&
+                 !(context.CurrOp is OpCodeSimdMemMs ||
+                   context.CurrOp is OpCodeSimdMemSs);
+        }
+
+        private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size)
+        {
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            Operand lblFastPath = Label();
+            Operand lblSlowPath = Label();
+            Operand lblEnd      = Label();
+
+            context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+            context.MarkLabel(lblSlowPath);
+
+            EmitReadIntFallback(context, address, rt, size);
+
+            context.Branch(lblEnd);
+
+            context.MarkLabel(lblFastPath);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+            Operand value = null;
+
+            switch (size)
+            {
+                case 0:
+                    value = context.Load8(physAddr);
+                    break;
+
+                case 1:
+                    value = context.Load16(physAddr);
+                    break;
+
+                case 2:
+                    value = context.Load(OperandType.I32, physAddr);
+                    break;
+
+                case 3:
+                    value = context.Load(OperandType.I64, physAddr);
+                    break;
+            }
+
+            SetInt(context, rt, value);
+
+            context.MarkLabel(lblEnd);
+        }
+
+        private static void EmitReadVector(
+            ArmEmitterContext context,
+            Operand address,
+            Operand vector,
+            int rt,
+            int elem,
+            int size)
+        {
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            Operand lblFastPath = Label();
+            Operand lblSlowPath = Label();
+            Operand lblEnd      = Label();
+
+            context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+            context.MarkLabel(lblSlowPath);
+
+            EmitReadVectorFallback(context, address, vector, rt, elem, size);
+
+            context.Branch(lblEnd);
+
+            context.MarkLabel(lblFastPath);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+            Operand value = null;
+
+            switch (size)
+            {
+                case 0:
+                    value = context.VectorInsert8(vector, context.Load8(physAddr), elem);
+                    break;
+
+                case 1:
+                    value = context.VectorInsert16(vector, context.Load16(physAddr), elem);
+                    break;
+
+                case 2:
+                    value = context.VectorInsert(vector, context.Load(OperandType.I32, physAddr), elem);
+                    break;
+
+                case 3:
+                    value = context.VectorInsert(vector, context.Load(OperandType.I64, physAddr), elem);
+                    break;
+
+                case 4:
+                    value = context.Load(OperandType.V128, physAddr);
+                    break;
+            }
+
+            context.Copy(GetVec(rt), value);
+
+            context.MarkLabel(lblEnd);
+        }
+
+        private static Operand VectorCreate(ArmEmitterContext context, Operand value)
+        {
+            return context.VectorInsert(context.VectorZero(), value, 0);
+        }
+
+        private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size)
+        {
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            Operand lblFastPath = Label();
+            Operand lblSlowPath = Label();
+            Operand lblEnd      = Label();
+
+            context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+            context.MarkLabel(lblSlowPath);
+
+            EmitWriteIntFallback(context, address, rt, size);
+
+            context.Branch(lblEnd);
+
+            context.MarkLabel(lblFastPath);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+            Operand value = GetInt(context, rt);
+
+            if (size < 3 && value.Type == OperandType.I64)
+            {
+                value = context.ConvertI64ToI32(value);
+            }
+
+            switch (size)
+            {
+                case 0: context.Store8 (physAddr, value); break;
+                case 1: context.Store16(physAddr, value); break;
+                case 2: context.Store  (physAddr, value); break;
+                case 3: context.Store  (physAddr, value); break;
+            }
+
+            context.MarkLabel(lblEnd);
+        }
+
+        private static void EmitWriteVector(
+            ArmEmitterContext context,
+            Operand address,
+            int rt,
+            int elem,
+            int size)
+        {
+            Operand isUnalignedAddr = EmitAddressCheck(context, address, size);
+
+            Operand lblFastPath = Label();
+            Operand lblSlowPath = Label();
+            Operand lblEnd      = Label();
+
+            context.BranchIfFalse(lblFastPath, isUnalignedAddr);
+
+            context.MarkLabel(lblSlowPath);
+
+            EmitWriteVectorFallback(context, address, rt, elem, size);
+
+            context.Branch(lblEnd);
+
+            context.MarkLabel(lblFastPath);
+
+            Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath);
+
+            Operand value = GetVec(rt);
+
+            switch (size)
+            {
+                case 0:
+                    context.Store8(physAddr, context.VectorExtract8(value, elem));
+                    break;
+
+                case 1:
+                    context.Store16(physAddr, context.VectorExtract16(value, elem));
+                    break;
+
+                case 2:
+                    context.Store(physAddr, context.VectorExtract(OperandType.FP32, value, elem));
+                    break;
+
+                case 3:
+                    context.Store(physAddr, context.VectorExtract(OperandType.FP64, value, elem));
+                    break;
+
+                case 4:
+                    context.Store(physAddr, value);
+                    break;
+            }
+
+            context.MarkLabel(lblEnd);
+        }
+
+        private static Operand EmitAddressCheck(ArmEmitterContext context, Operand address, int size)
+        {
+            long addressCheckMask = ~(context.Memory.AddressSpaceSize - 1);
+
+            addressCheckMask |= (1u << size) - 1;
+
+            return context.BitwiseAnd(address, Const(address.Type, addressCheckMask));
+        }
+
+        private static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblFallbackPath)
+        {
+            Operand pte = Const(context.Memory.PageTable.ToInt64());
+
+            int bit = MemoryManager.PageBits;
+
+            do
+            {
+                Operand addrPart = context.ShiftRightUI(address, Const(bit));
+
+                bit += context.Memory.PtLevelBits;
+
+                if (bit < context.Memory.AddressSpaceBits)
+                {
+                    addrPart = context.BitwiseAnd(addrPart, Const(addrPart.Type, context.Memory.PtLevelMask));
+                }
+
+                Operand pteOffset = context.ShiftLeft(addrPart, Const(3));
+
+                if (pteOffset.Type == OperandType.I32)
+                {
+                    pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset);
+                }
+
+                Operand pteAddress = context.Add(pte, pteOffset);
+
+                pte = context.Load(OperandType.I64, pteAddress);
+            }
+            while (bit < context.Memory.AddressSpaceBits);
+
+            if (!context.Memory.HasWriteWatchSupport)
+            {
+                Operand hasFlagSet = context.BitwiseAnd(pte, Const((long)MemoryManager.PteFlagsMask));
+
+                context.BranchIfTrue(lblFallbackPath, hasFlagSet);
+            }
+
+            Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, MemoryManager.PageMask));
+
+            if (pageOffset.Type == OperandType.I32)
+            {
+                pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset);
+            }
+
+            Operand physAddr = context.Add(pte, pageOffset);
+
+            return physAddr;
+        }
+
+        private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+        {
+            Delegate fallbackMethodDlg = null;
+
+            switch (size)
+            {
+                case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte);   break;
+                case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16); break;
+                case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32); break;
+                case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64); break;
+            }
+
+            SetInt(context, rt, context.Call(fallbackMethodDlg, address));
+        }
+
+        private static void EmitReadVectorFallback(
+            ArmEmitterContext context,
+            Operand address,
+            Operand vector,
+            int rt,
+            int elem,
+            int size)
+        {
+            Delegate fallbackMethodDlg = null;
+
+            switch (size)
+            {
+                case 0: fallbackMethodDlg = new _U8_U64  (NativeInterface.ReadByte);      break;
+                case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16);    break;
+                case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32);    break;
+                case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64);    break;
+                case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break;
+            }
+
+            Operand value = context.Call(fallbackMethodDlg, address);
+
+            switch (size)
+            {
+                case 0: value = context.VectorInsert8 (vector, value, elem); break;
+                case 1: value = context.VectorInsert16(vector, value, elem); break;
+                case 2: value = context.VectorInsert  (vector, value, elem); break;
+                case 3: value = context.VectorInsert  (vector, value, elem); break;
+            }
+
+            context.Copy(GetVec(rt), value);
+        }
+
+        private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size)
+        {
+            Delegate fallbackMethodDlg = null;
+
+            switch (size)
+            {
+                case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte);   break;
+                case 1: fallbackMethodDlg = new _Void_U64_U16(NativeInterface.WriteUInt16); break;
+                case 2: fallbackMethodDlg = new _Void_U64_U32(NativeInterface.WriteUInt32); break;
+                case 3: fallbackMethodDlg = new _Void_U64_U64(NativeInterface.WriteUInt64); break;
+            }
+
+            Operand value = GetInt(context, rt);
+
+            if (size < 3 && value.Type == OperandType.I64)
+            {
+                value = context.ConvertI64ToI32(value);
+            }
+
+            context.Call(fallbackMethodDlg, address, value);
+        }
+
+        private static void EmitWriteVectorFallback(
+            ArmEmitterContext context,
+            Operand address,
+            int rt,
+            int elem,
+            int size)
+        {
+            Delegate fallbackMethodDlg = null;
+
+            switch (size)
+            {
+                case 0: fallbackMethodDlg = new _Void_U64_U8  (NativeInterface.WriteByte);      break;
+                case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16);    break;
+                case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32);    break;
+                case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64);    break;
+                case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break;
+            }
+
+            Operand value = null;
+
+            if (size < 4)
+            {
+                switch (size)
+                {
+                    case 0:
+                        value = context.VectorExtract8(GetVec(rt), elem);
+                        break;
+
+                    case 1:
+                        value = context.VectorExtract16(GetVec(rt), elem);
+                        break;
+
+                    case 2:
+                        value = context.VectorExtract(OperandType.I32, GetVec(rt), elem);
+                        break;
+
+                    case 3:
+                        value = context.VectorExtract(OperandType.I64, GetVec(rt), elem);
+                        break;
+                }
+            }
+            else
+            {
+                value = GetVec(rt);
+            }
+
+            context.Call(fallbackMethodDlg, address, value);
+        }
+
+        private static Operand GetInt(ArmEmitterContext context, int rt)
+        {
+            return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt);
+        }
+
+        private static void SetInt(ArmEmitterContext context, int rt, Operand value)
+        {
+            if (context.CurrOp is OpCode32)
+            {
+                SetIntA32(context, rt, value);
+            }
+            else
+            {
+                SetIntOrZR(context, rt, value);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMove.cs b/ARMeilleure/Instructions/InstEmitMove.cs
new file mode 100644
index 000000000..bf051f329
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMove.cs
@@ -0,0 +1,41 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Movk(ArmEmitterContext context)
+        {
+            OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+            OperandType type = op.GetOperandType();
+
+            Operand res = GetIntOrZR(context, op.Rd);
+
+            res = context.BitwiseAnd(res, Const(type, ~(0xffffL << op.Bit)));
+
+            res = context.BitwiseOr(res, Const(type, op.Immediate));
+
+            SetIntOrZR(context, op.Rd, res);
+        }
+
+        public static void Movn(ArmEmitterContext context)
+        {
+            OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+            SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), ~op.Immediate));
+        }
+
+        public static void Movz(ArmEmitterContext context)
+        {
+            OpCodeMov op = (OpCodeMov)context.CurrOp;
+
+            SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), op.Immediate));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitMul.cs b/ARMeilleure/Instructions/InstEmitMul.cs
new file mode 100644
index 000000000..65d11b30d
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitMul.cs
@@ -0,0 +1,100 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Madd(ArmEmitterContext context) => EmitMul(context, isAdd: true);
+        public static void Msub(ArmEmitterContext context) => EmitMul(context, isAdd: false);
+
+        private static void EmitMul(ArmEmitterContext context, bool isAdd)
+        {
+            OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+            Operand a = GetIntOrZR(context, op.Ra);
+            Operand n = GetIntOrZR(context, op.Rn);
+            Operand m = GetIntOrZR(context, op.Rm);
+
+            Operand res = context.Multiply(n, m);
+
+            res = isAdd ? context.Add(a, res) : context.Subtract(a, res);
+
+            SetIntOrZR(context, op.Rd, res);
+        }
+
+        public static void Smaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedAdd);
+        public static void Smsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedSubtract);
+        public static void Umaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.Add);
+        public static void Umsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.Subtract);
+
+        [Flags]
+        private enum MullFlags
+        {
+            Subtract = 0,
+            Add      = 1 << 0,
+            Signed   = 1 << 1,
+
+            SignedAdd      = Signed | Add,
+            SignedSubtract = Signed | Subtract
+        }
+
+        private static void EmitMull(ArmEmitterContext context, MullFlags flags)
+        {
+            OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+            Operand GetExtendedRegister32(int index)
+            {
+                Operand value = GetIntOrZR(context, index);
+
+                if ((flags & MullFlags.Signed) != 0)
+                {
+                    return context.SignExtend32(value.Type, value);
+                }
+                else
+                {
+                    return context.ZeroExtend32(value.Type, value);
+                }
+            }
+
+            Operand a = GetIntOrZR(context, op.Ra);
+
+            Operand n = GetExtendedRegister32(op.Rn);
+            Operand m = GetExtendedRegister32(op.Rm);
+
+            Operand res = context.Multiply(n, m);
+
+            res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res);
+
+            SetIntOrZR(context, op.Rd, res);
+        }
+
+        public static void Smulh(ArmEmitterContext context)
+        {
+            OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+            Operand m = GetIntOrZR(context, op.Rm);
+
+            Operand d = context.Multiply64HighSI(n, m);
+
+            SetIntOrZR(context, op.Rd, d);
+        }
+
+        public static void Umulh(ArmEmitterContext context)
+        {
+            OpCodeMul op = (OpCodeMul)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+            Operand m = GetIntOrZR(context, op.Rm);
+
+            Operand d = context.Multiply64HighUI(n, m);
+
+            SetIntOrZR(context, op.Rd, d);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
new file mode 100644
index 000000000..44659e805
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -0,0 +1,3159 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+// https://www.agner.org/optimize/#vectorclass @ vectori128.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    using Func2I = Func<Operand, Operand, Operand>;
+
+    static partial class InstEmit
+    {
+        public static void Abs_S(ArmEmitterContext context)
+        {
+            EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+        }
+
+        public static void Abs_V(ArmEmitterContext context)
+        {
+            EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+        }
+
+        public static void Add_S(ArmEmitterContext context)
+        {
+            EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+        }
+
+        public static void Add_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(addInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+            }
+        }
+
+        public static void Addhn_V(ArmEmitterContext context)
+        {
+            EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false);
+        }
+
+        public static void Addp_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+            Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size);
+
+            Operand res = context.Add(ne0, ne1);
+
+            context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size));
+        }
+
+        public static void Addp_V(ArmEmitterContext context)
+        {
+            EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2));
+        }
+
+        public static void Addv_V(ArmEmitterContext context)
+        {
+            EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+        }
+
+        public static void Cls_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            int eSize = 8 << op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+                Operand de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingSigns), ne, Const(eSize));
+
+                res = EmitVectorInsert(context, res, de, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Clz_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            int eSize = 8 << op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+                Operand de;
+
+                if (eSize == 64)
+                {
+                    de = context.CountLeadingZeros(ne);
+                }
+                else
+                {
+                    de = context.Call(new _U64_U64_S32(SoftFallback.CountLeadingZeros), ne, Const(eSize));
+                }
+
+                res = EmitVectorInsert(context, res, de, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Cnt_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+                Operand de;
+
+                if (Optimizations.UsePopCnt)
+                {
+                    de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne);
+                }
+                else
+                {
+                    de = context.Call(new _U64_U64(SoftFallback.CountSetBits8), ne);
+                }
+
+                res = EmitVectorInsert(context, res, de, index, 0);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Fabd_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm));
+
+                    Operand mask = X86GetScalar(context, -0f);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm));
+
+                    Operand mask = X86GetScalar(context, -0d);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+                }
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+
+                    return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res);
+                });
+            }
+        }
+
+        public static void Fabd_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm));
+
+                    Operand mask = X86GetAllElements(context, -0f);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm));
+
+                    Operand mask = X86GetAllElements(context, -0d);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    Operand res = EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+
+                    return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, res);
+                });
+            }
+        }
+
+        public static void Fabs_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                if (op.Size == 0)
+                {
+                    Operand mask = X86GetScalar(context, -0f);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+                }
+                else /* if (op.Size == 1) */
+                {
+                    Operand mask = X86GetScalar(context, -0d);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+                }
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1);
+                });
+            }
+        }
+
+        public static void Fabs_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                int sizeF = op.Size & 1;
+
+                 if (sizeF == 0)
+                {
+                    Operand mask = X86GetAllElements(context, -0f);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, GetVec(op.Rn));
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand mask = X86GetAllElements(context, -0d);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, GetVec(op.Rn));
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1);
+                });
+            }
+        }
+
+        public static void Fadd_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+            }
+            else if (Optimizations.FastFP)
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
+                });
+            }
+        }
+
+        public static void Fadd_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+            }
+            else if (Optimizations.FastFP)
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2));
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
+                });
+            }
+        }
+
+        public static void Faddp_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.FastFP && Optimizations.UseSse3)
+            {
+                if (sizeF == 0)
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn));
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn));
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+                }
+            }
+            else
+            {
+                OperandType type = sizeF != 0 ? OperandType.FP64
+                                              : OperandType.FP32;
+
+                Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
+                Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
+
+                Operand res = EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, ne0, ne1);
+
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
+        }
+
+        public static void Faddp_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorPairwiseOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+            }
+            else
+            {
+                EmitVectorPairwiseOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2);
+                });
+            }
+        }
+
+        public static void Fdiv_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
+            }
+            else if (Optimizations.FastFP)
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2);
+                });
+            }
+        }
+
+        public static void Fdiv_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd);
+            }
+            else if (Optimizations.FastFP)
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2));
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2);
+                });
+            }
+        }
+
+        public static void Fmadd_S(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand a = GetVec(op.Ra);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.Size == 0)
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+                }
+                else /* if (op.Size == 1) */
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+                }
+            }
+            else
+            {
+                EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+                });
+            }
+        }
+
+        public static void Fmax_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitScalarBinaryOpF(context, Intrinsic.X86Maxss, Intrinsic.X86Maxsd);
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
+                });
+            }
+        }
+
+        public static void Fmax_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorBinaryOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
+                });
+            }
+        }
+
+        public static void Fmaxnm_S(ArmEmitterContext context)
+        {
+            EmitScalarBinaryOpF(context, (op1, op2) =>
+            {
+                return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2);
+            });
+        }
+
+        public static void Fmaxnm_V(ArmEmitterContext context)
+        {
+            EmitVectorBinaryOpF(context, (op1, op2) =>
+            {
+                return EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2);
+            });
+        }
+
+        public static void Fmaxp_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorPairwiseOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+            }
+            else
+            {
+                EmitVectorPairwiseOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMax, SoftFloat64.FPMax, op1, op2);
+                });
+            }
+        }
+
+        public static void Fmin_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitScalarBinaryOpF(context, Intrinsic.X86Minss, Intrinsic.X86Minsd);
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
+                });
+            }
+        }
+
+        public static void Fmin_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorBinaryOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
+                });
+            }
+        }
+
+        public static void Fminnm_S(ArmEmitterContext context)
+        {
+            EmitScalarBinaryOpF(context, (op1, op2) =>
+            {
+                return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2);
+            });
+        }
+
+        public static void Fminnm_V(ArmEmitterContext context)
+        {
+            EmitVectorBinaryOpF(context, (op1, op2) =>
+            {
+                return EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2);
+            });
+        }
+
+        public static void Fminp_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorPairwiseOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+            }
+            else
+            {
+                EmitVectorPairwiseOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMin, SoftFloat64.FPMin, op1, op2);
+                });
+            }
+        }
+
+        public static void Fmla_Se(ArmEmitterContext context) // Fused.
+        {
+            EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+            {
+                return context.Add(op1, context.Multiply(op2, op3));
+            });
+        }
+
+        public static void Fmla_V(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+                });
+            }
+        }
+
+        public static void Fmla_Ve(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    int shuffleMask = op.Index | op.Index << 1;
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+                });
+            }
+        }
+
+        public static void Fmls_Se(ArmEmitterContext context) // Fused.
+        {
+            EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
+            {
+                return context.Subtract(op1, context.Multiply(op2, op3));
+            });
+        }
+
+        public static void Fmls_V(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                EmitVectorTernaryOpF(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+                });
+            }
+        }
+
+        public static void Fmls_Ve(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    int shuffleMask = op.Index | op.Index << 1;
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                EmitVectorTernaryOpByElemF(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+                });
+            }
+        }
+
+        public static void Fmsub_S(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand a = GetVec(op.Ra);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.Size == 0)
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+                }
+                else /* if (op.Size == 1) */
+                {
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+                }
+            }
+            else
+            {
+                EmitScalarTernaryRaOpF(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+                });
+            }
+        }
+
+        public static void Fmul_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+            }
+            else if (Optimizations.FastFP)
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+                });
+            }
+        }
+
+        public static void Fmul_Se(ArmEmitterContext context)
+        {
+            EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+        }
+
+        public static void Fmul_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+            }
+            else if (Optimizations.FastFP)
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2));
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+                });
+            }
+        }
+
+        public static void Fmul_Ve(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    int shuffleMask = op.Index | op.Index << 1;
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else if (Optimizations.FastFP)
+            {
+                EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2));
+            }
+            else
+            {
+                EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+                });
+            }
+        }
+
+        public static void Fmulx_S(ArmEmitterContext context)
+        {
+            EmitScalarBinaryOpF(context, (op1, op2) =>
+            {
+                return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+            });
+        }
+
+        public static void Fmulx_Se(ArmEmitterContext context)
+        {
+            EmitScalarBinaryOpByElemF(context, (op1, op2) =>
+            {
+                return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+            });
+        }
+
+        public static void Fmulx_V(ArmEmitterContext context)
+        {
+            EmitVectorBinaryOpF(context, (op1, op2) =>
+            {
+                return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+            });
+        }
+
+        public static void Fmulx_Ve(ArmEmitterContext context)
+        {
+            EmitVectorBinaryOpByElemF(context, (op1, op2) =>
+            {
+                return EmitSoftFloatCall(context, SoftFloat32.FPMulX, SoftFloat64.FPMulX, op1, op2);
+            });
+        }
+
+        public static void Fneg_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                if (op.Size == 0)
+                {
+                    Operand mask = X86GetScalar(context, -0f);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+                }
+                else /* if (op.Size == 1) */
+                {
+                    Operand mask = X86GetScalar(context, -0d);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+                }
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) => context.Negate(op1));
+            }
+        }
+
+        public static void Fneg_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    Operand mask = X86GetAllElements(context, -0f);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn));
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand mask = X86GetAllElements(context, -0d);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn));
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) => context.Negate(op1));
+            }
+        }
+
+        public static void Fnmadd_S(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            OperandType type = sizeF != 0 ? OperandType.FP64
+                                          : OperandType.FP32;
+
+            Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+            Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+            Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0);
+
+            Operand res = context.Subtract(context.Multiply(context.Negate(ne), me), ae);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+        }
+
+        public static void Fnmsub_S(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            OperandType type = sizeF != 0 ? OperandType.FP64
+                                          : OperandType.FP32;
+
+            Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+            Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+            Operand ae = context.VectorExtract(type, GetVec(op.Ra), 0);
+
+            Operand res = context.Subtract(context.Multiply(ne, me), ae);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+        }
+
+        public static void Fnmul_S(ArmEmitterContext context)
+        {
+            EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
+        }
+
+        public static void Frecpe_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+            {
+                EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
+                });
+            }
+        }
+
+        public static void Frecpe_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+            {
+                EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0);
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPRecipEstimate, SoftFloat64.FPRecipEstimate, op1);
+                });
+            }
+        }
+
+        public static void Frecps_S(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    Operand mask = X86GetScalar(context, 2f);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand mask = X86GetScalar(context, 2d);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+                }
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
+                });
+            }
+        }
+
+        public static void Frecps_V(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    Operand mask = X86GetAllElements(context, 2f);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand mask = X86GetAllElements(context, 2d);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPRecipStepFused, SoftFloat64.FPRecipStepFused, op1, op2);
+                });
+            }
+        }
+
+        public static void Frecpx_S(ArmEmitterContext context)
+        {
+            EmitScalarUnaryOpF(context, (op1) =>
+            {
+                return EmitSoftFloatCall(context, SoftFloat32.FPRecpX, SoftFloat64.FPRecpX, op1);
+            });
+        }
+
+        public static void Frinta_S(ArmEmitterContext context)
+        {
+            EmitScalarUnaryOpF(context, (op1) =>
+            {
+                return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+            });
+        }
+
+        public static void Frinta_V(ArmEmitterContext context)
+        {
+            EmitVectorUnaryOpF(context, (op1) =>
+            {
+                return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1);
+            });
+        }
+
+        public static void Frinti_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            EmitScalarUnaryOpF(context, (op1) =>
+            {
+                if (op.Size == 0)
+                {
+                    return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+                }
+                else /* if (op.Size == 1) */
+                {
+                    return context.Call(new _F64_F64(SoftFallback.Round), op1);
+                }
+            });
+        }
+
+        public static void Frinti_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            EmitVectorUnaryOpF(context, (op1) =>
+            {
+                if (sizeF == 0)
+                {
+                    return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    return context.Call(new _F64_F64(SoftFallback.Round), op1);
+                }
+            });
+        }
+
+        public static void Frintm_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1);
+                });
+            }
+        }
+
+        public static void Frintm_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1);
+                });
+            }
+        }
+
+        public static void Frintn_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitScalarRoundOpF(context, FPRoundingMode.ToNearest);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+                });
+            }
+        }
+
+        public static void Frintn_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitVectorRoundOpF(context, FPRoundingMode.ToNearest);
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitRoundMathCall(context, MidpointRounding.ToEven, op1);
+                });
+            }
+        }
+
+        public static void Frintp_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1);
+                });
+            }
+        }
+
+        public static void Frintp_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1);
+                });
+            }
+        }
+
+        public static void Frintx_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            EmitScalarUnaryOpF(context, (op1) =>
+            {
+                if (op.Size == 0)
+                {
+                    return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+                }
+                else /* if (op.Size == 1) */
+                {
+                    return context.Call(new _F64_F64(SoftFallback.Round), op1);
+                }
+            });
+        }
+
+        public static void Frintx_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            EmitVectorUnaryOpF(context, (op1) =>
+            {
+                if (sizeF == 0)
+                {
+                    return context.Call(new _F32_F32(SoftFallback.RoundF), op1);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    return context.Call(new _F64_F64(SoftFallback.Round), op1);
+                }
+            });
+        }
+
+        public static void Frintz_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1);
+                });
+            }
+        }
+
+        public static void Frintz_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero);
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1);
+                });
+            }
+        }
+
+        public static void Frsqrte_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+            {
+                EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1);
+                });
+            }
+        }
+
+        public static void Frsqrte_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
+            {
+                EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0);
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtEstimate, SoftFloat64.FPRSqrtEstimate, op1);
+                });
+            }
+        }
+
+        public static void Frsqrts_S(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    Operand maskHalf  = X86GetScalar(context, 0.5f);
+                    Operand maskThree = X86GetScalar(context, 3f);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, GetVec(op.Rn), GetVec(op.Rm));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf,  res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand maskHalf  = X86GetScalar(context, 0.5d);
+                    Operand maskThree = X86GetScalar(context, 3d);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, GetVec(op.Rn), GetVec(op.Rm));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf,  res);
+
+                    context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
+                }
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2);
+                });
+            }
+        }
+
+        public static void Frsqrts_V(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                int sizeF = op.Size & 1;
+
+                if (sizeF == 0)
+                {
+                    Operand maskHalf  = X86GetAllElements(context, 0.5f);
+                    Operand maskThree = X86GetAllElements(context, 3f);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, GetVec(op.Rn), GetVec(op.Rm));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf,  res);
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.VectorZeroUpper64(res);
+                    }
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Operand maskHalf  = X86GetAllElements(context, 0.5d);
+                    Operand maskThree = X86GetAllElements(context, 3d);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, GetVec(op.Rn), GetVec(op.Rm));
+
+                    res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf,  res);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStepFused, SoftFloat64.FPRSqrtStepFused, op1, op2);
+                });
+            }
+        }
+
+        public static void Fsqrt_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
+            }
+            else
+            {
+                EmitScalarUnaryOpF(context, (op1) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1);
+                });
+            }
+        }
+
+        public static void Fsqrt_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd);
+            }
+            else
+            {
+                EmitVectorUnaryOpF(context, (op1) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1);
+                });
+            }
+        }
+
+        public static void Fsub_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+            }
+            else if (Optimizations.FastFP)
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+            }
+            else
+            {
+                EmitScalarBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+                });
+            }
+        }
+
+        public static void Fsub_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+            }
+            else if (Optimizations.FastFP)
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2));
+            }
+            else
+            {
+                EmitVectorBinaryOpF(context, (op1, op2) =>
+                {
+                    return EmitSoftFloatCall(context, SoftFloat32.FPSub, SoftFloat64.FPSub, op1, op2);
+                });
+            }
+        }
+
+        public static void Mla_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Mul_AddSub(context, AddSub.Add);
+            }
+            else
+            {
+                EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
+        }
+
+        public static void Mla_Ve(ArmEmitterContext context)
+        {
+            EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+            {
+                return context.Add(op1, context.Multiply(op2, op3));
+            });
+        }
+
+        public static void Mls_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Mul_AddSub(context, AddSub.Subtract);
+            }
+            else
+            {
+                EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
+        }
+
+        public static void Mls_Ve(ArmEmitterContext context)
+        {
+            EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) =>
+            {
+                return context.Subtract(op1, context.Multiply(op2, op3));
+            });
+        }
+
+        public static void Mul_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Mul_AddSub(context, AddSub.None);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+            }
+        }
+
+        public static void Mul_Ve(ArmEmitterContext context)
+        {
+            EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+        }
+
+        public static void Neg_S(ArmEmitterContext context)
+        {
+            EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1));
+        }
+
+        public static void Neg_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Intrinsic subInst = X86PsubInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn));
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1));
+            }
+        }
+
+        public static void Raddhn_V(ArmEmitterContext context)
+        {
+            EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true);
+        }
+
+        public static void Rsubhn_V(ArmEmitterContext context)
+        {
+            EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true);
+        }
+
+        public static void Saba_V(ArmEmitterContext context)
+        {
+            EmitVectorTernaryOpSx(context, (op1, op2, op3) =>
+            {
+                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+            });
+        }
+
+        public static void Sabal_V(ArmEmitterContext context)
+        {
+            EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+            {
+                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+            });
+        }
+
+        public static void Sabd_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                EmitSse41Sabd(context, op, n, m, isLong: false);
+            }
+            else
+            {
+                EmitVectorBinaryOpSx(context, (op1, op2) =>
+                {
+                    return EmitAbs(context, context.Subtract(op1, op2));
+                });
+            }
+        }
+
+        public static void Sabdl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse41 && op.Size < 2)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = op.Size == 0
+                    ? Intrinsic.X86Pmovsxbw
+                    : Intrinsic.X86Pmovsxwd;
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                EmitSse41Sabd(context, op, n, m, isLong: true);
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) =>
+                {
+                    return EmitAbs(context, context.Subtract(op1, op2));
+                });
+            }
+        }
+
+        public static void Sadalp_V(ArmEmitterContext context)
+        {
+            EmitAddLongPairwise(context, signed: true, accumulate: true);
+        }
+
+        public static void Saddl_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+                context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+            }
+        }
+
+        public static void Saddlp_V(ArmEmitterContext context)
+        {
+            EmitAddLongPairwise(context, signed: true, accumulate: false);
+        }
+
+        public static void Saddlv_V(ArmEmitterContext context)
+        {
+            EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2));
+        }
+
+        public static void Saddw_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+                context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+            }
+            else
+            {
+                EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2));
+            }
+        }
+
+        public static void Shadd_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand res  = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+                Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+                Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad;
+
+                res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                res = context.AddIntrinsic(addInst, res, res2);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpSx(context, (op1, op2) =>
+                {
+                    return context.ShiftRightSI(context.Add(op1, op2), Const(1));
+                });
+            }
+        }
+
+        public static void Shsub_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size < 2)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                Operand nPlusMask = context.AddIntrinsic(addInst, n, mask);
+                Operand mPlusMask = context.AddIntrinsic(addInst, m, mask);
+
+                Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+                Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask);
+
+                Intrinsic subInst = X86PsubInstruction[op.Size];
+
+                res = context.AddIntrinsic(subInst, nPlusMask, res);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpSx(context, (op1, op2) =>
+                {
+                    return context.ShiftRightSI(context.Subtract(op1, op2), Const(1));
+                });
+            }
+        }
+
+        public static void Smax_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic maxInst = X86PmaxsInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(maxInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Delegate dlg = new _S64_S64_S64(Math.Max);
+
+                EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+            }
+        }
+
+        public static void Smaxp_V(ArmEmitterContext context)
+        {
+            Delegate dlg = new _S64_S64_S64(Math.Max);
+
+            EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+        }
+
+        public static void Smaxv_V(ArmEmitterContext context)
+        {
+            Delegate dlg = new _S64_S64_S64(Math.Max);
+
+            EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+        }
+
+        public static void Smin_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic minInst = X86PminsInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(minInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Delegate dlg = new _S64_S64_S64(Math.Min);
+
+                EmitVectorBinaryOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+            }
+        }
+
+        public static void Sminp_V(ArmEmitterContext context)
+        {
+            Delegate dlg = new _S64_S64_S64(Math.Min);
+
+            EmitVectorPairwiseOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+        }
+
+        public static void Sminv_V(ArmEmitterContext context)
+        {
+            Delegate dlg = new _S64_S64_S64(Math.Min);
+
+            EmitVectorAcrossVectorOpSx(context, (op1, op2) => context.Call(dlg, op1, op2));
+        }
+
+        public static void Smlal_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse41 && op.Size < 2)
+            {
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+                Operand res = context.AddIntrinsic(mullInst, n, m);
+
+                Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+                context.Copy(d, context.AddIntrinsic(addInst, d, res));
+            }
+            else
+            {
+                EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
+        }
+
+        public static void Smlal_Ve(ArmEmitterContext context)
+        {
+            EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+            {
+                return context.Add(op1, context.Multiply(op2, op3));
+            });
+        }
+
+        public static void Smlsl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse41 && op.Size < 2)
+            {
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = op.Size == 0
+                    ? Intrinsic.X86Pmovsxbw
+                    : Intrinsic.X86Pmovsxwd;
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+                Operand res = context.AddIntrinsic(mullInst, n, m);
+
+                Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+                context.Copy(d, context.AddIntrinsic(subInst, d, res));
+            }
+            else
+            {
+                EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
+        }
+
+        public static void Smlsl_Ve(ArmEmitterContext context)
+        {
+            EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) =>
+            {
+                return context.Subtract(op1, context.Multiply(op2, op3));
+            });
+        }
+
+        public static void Smull_V(ArmEmitterContext context)
+        {
+            EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2));
+        }
+
+        public static void Smull_Ve(ArmEmitterContext context)
+        {
+            EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2));
+        }
+
+        public static void Sqabs_S(ArmEmitterContext context)
+        {
+            EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+        }
+
+        public static void Sqabs_V(ArmEmitterContext context)
+        {
+            EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1));
+        }
+
+        public static void Sqadd_S(ArmEmitterContext context)
+        {
+            EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Add);
+        }
+
+        public static void Sqadd_V(ArmEmitterContext context)
+        {
+            EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Add);
+        }
+
+        public static void Sqdmulh_S(ArmEmitterContext context)
+        {
+            EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.ScalarSx);
+        }
+
+        public static void Sqdmulh_V(ArmEmitterContext context)
+        {
+            EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false), SaturatingFlags.VectorSx);
+        }
+
+        public static void Sqneg_S(ArmEmitterContext context)
+        {
+            EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+        }
+
+        public static void Sqneg_V(ArmEmitterContext context)
+        {
+            EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1));
+        }
+
+        public static void Sqrdmulh_S(ArmEmitterContext context)
+        {
+            EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.ScalarSx);
+        }
+
+        public static void Sqrdmulh_V(ArmEmitterContext context)
+        {
+            EmitSaturatingBinaryOp(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true), SaturatingFlags.VectorSx);
+        }
+
+        public static void Sqsub_S(ArmEmitterContext context)
+        {
+            EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Sub);
+        }
+
+        public static void Sqsub_V(ArmEmitterContext context)
+        {
+            EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Sub);
+        }
+
+        public static void Sqxtn_S(ArmEmitterContext context)
+        {
+            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx);
+        }
+
+        public static void Sqxtn_V(ArmEmitterContext context)
+        {
+            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx);
+        }
+
+        public static void Sqxtun_S(ArmEmitterContext context)
+        {
+            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx);
+        }
+
+        public static void Sqxtun_V(ArmEmitterContext context)
+        {
+            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx);
+        }
+
+        public static void Srhadd_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size < 2)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u));
+
+                Intrinsic subInst = X86PsubInstruction[op.Size];
+
+                Operand nMinusMask = context.AddIntrinsic(subInst, n, mask);
+                Operand mMinusMask = context.AddIntrinsic(subInst, m, mask);
+
+                Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+                Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask);
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                res = context.AddIntrinsic(addInst, mask, res);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpSx(context, (op1, op2) =>
+                {
+                    Operand res = context.Add(op1, op2);
+
+                    res = context.Add(res, Const(1L));
+
+                    return context.ShiftRightSI(res, Const(1));
+                });
+            }
+        }
+
+        public static void Ssubl_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+                context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+            }
+        }
+
+        public static void Ssubw_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovsxInstruction[op.Size];
+
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+                context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+            }
+            else
+            {
+                EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2));
+            }
+        }
+
+        public static void Sub_S(ArmEmitterContext context)
+        {
+            EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+        }
+
+        public static void Sub_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic subInst = X86PsubInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(subInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+            }
+        }
+
+        public static void Subhn_V(ArmEmitterContext context)
+        {
+            EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false);
+        }
+
+        public static void Suqadd_S(ArmEmitterContext context)
+        {
+            EmitScalarSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate);
+        }
+
+        public static void Suqadd_V(ArmEmitterContext context)
+        {
+            EmitVectorSaturatingBinaryOpSx(context, SaturatingFlags.Accumulate);
+        }
+
+        public static void Uaba_V(ArmEmitterContext context)
+        {
+            EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+            {
+                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+            });
+        }
+
+        public static void Uabal_V(ArmEmitterContext context)
+        {
+            EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+            {
+                return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3)));
+            });
+        }
+
+        public static void Uabd_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                EmitSse41Uabd(context, op, n, m, isLong: false);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) =>
+                {
+                    return EmitAbs(context, context.Subtract(op1, op2));
+                });
+            }
+        }
+
+        public static void Uabdl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse41 && op.Size < 2)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = op.Size == 0
+                    ? Intrinsic.X86Pmovzxbw
+                    : Intrinsic.X86Pmovzxwd;
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                EmitSse41Uabd(context, op, n, m, isLong: true);
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) =>
+                {
+                    return EmitAbs(context, context.Subtract(op1, op2));
+                });
+            }
+        }
+
+        public static void Uadalp_V(ArmEmitterContext context)
+        {
+            EmitAddLongPairwise(context, signed: false, accumulate: true);
+        }
+
+        public static void Uaddl_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+                context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+            }
+        }
+
+        public static void Uaddlp_V(ArmEmitterContext context)
+        {
+            EmitAddLongPairwise(context, signed: false, accumulate: false);
+        }
+
+        public static void Uaddlv_V(ArmEmitterContext context)
+        {
+            EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2));
+        }
+
+        public static void Uaddw_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+                context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m));
+            }
+            else
+            {
+                EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2));
+            }
+        }
+
+        public static void Uhadd_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand res  = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+                Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+                Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld;
+
+                res2 = context.AddIntrinsic(shiftInst, res2, Const(1));
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                res = context.AddIntrinsic(addInst, res, res2);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) =>
+                {
+                    return context.ShiftRightUI(context.Add(op1, op2), Const(1));
+                });
+            }
+        }
+
+        public static void Uhsub_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size < 2)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+                Operand res = context.AddIntrinsic(avgInst, n, m);
+
+                Intrinsic subInst = X86PsubInstruction[op.Size];
+
+                res = context.AddIntrinsic(subInst, n, res);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) =>
+                {
+                    return context.ShiftRightUI(context.Subtract(op1, op2), Const(1));
+                });
+            }
+        }
+
+        public static void Umax_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(maxInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Delegate dlg = new _U64_U64_U64(Math.Max);
+
+                EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+            }
+        }
+
+        public static void Umaxp_V(ArmEmitterContext context)
+        {
+            Delegate dlg = new _U64_U64_U64(Math.Max);
+
+            EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+        }
+
+        public static void Umaxv_V(ArmEmitterContext context)
+        {
+            Delegate dlg = new _U64_U64_U64(Math.Max);
+
+            EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+        }
+
+        public static void Umin_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic minInst = X86PminuInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(minInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Delegate dlg = new _U64_U64_U64(Math.Min);
+
+                EmitVectorBinaryOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+            }
+        }
+
+        public static void Uminp_V(ArmEmitterContext context)
+        {
+            Delegate dlg = new _U64_U64_U64(Math.Min);
+
+            EmitVectorPairwiseOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+        }
+
+        public static void Uminv_V(ArmEmitterContext context)
+        {
+            Delegate dlg = new _U64_U64_U64(Math.Min);
+
+            EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Call(dlg, op1, op2));
+        }
+
+        public static void Umlal_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse41 && op.Size < 2)
+            {
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+                Operand res = context.AddIntrinsic(mullInst, n, m);
+
+                Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+                context.Copy(d, context.AddIntrinsic(addInst, d, res));
+            }
+            else
+            {
+                EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+                {
+                    return context.Add(op1, context.Multiply(op2, op3));
+                });
+            }
+        }
+
+        public static void Umlal_Ve(ArmEmitterContext context)
+        {
+            EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+            {
+                return context.Add(op1, context.Multiply(op2, op3));
+            });
+        }
+
+        public static void Umlsl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse41 && op.Size < 2)
+            {
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = op.Size == 0
+                    ? Intrinsic.X86Pmovzxbw
+                    : Intrinsic.X86Pmovzxwd;
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld;
+
+                Operand res = context.AddIntrinsic(mullInst, n, m);
+
+                Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+                context.Copy(d, context.AddIntrinsic(subInst, d, res));
+            }
+            else
+            {
+                EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) =>
+                {
+                    return context.Subtract(op1, context.Multiply(op2, op3));
+                });
+            }
+        }
+
+        public static void Umlsl_Ve(ArmEmitterContext context)
+        {
+            EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) =>
+            {
+                return context.Subtract(op1, context.Multiply(op2, op3));
+            });
+        }
+
+        public static void Umull_V(ArmEmitterContext context)
+        {
+            EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2));
+        }
+
+        public static void Umull_Ve(ArmEmitterContext context)
+        {
+            EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2));
+        }
+
+        public static void Uqadd_S(ArmEmitterContext context)
+        {
+            EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+        }
+
+        public static void Uqadd_V(ArmEmitterContext context)
+        {
+            EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add);
+        }
+
+        public static void Uqsub_S(ArmEmitterContext context)
+        {
+            EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+        }
+
+        public static void Uqsub_V(ArmEmitterContext context)
+        {
+            EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub);
+        }
+
+        public static void Uqxtn_S(ArmEmitterContext context)
+        {
+            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx);
+        }
+
+        public static void Uqxtn_V(ArmEmitterContext context)
+        {
+            EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx);
+        }
+
+        public static void Urhadd_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size < 2)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw;
+
+                Operand res = context.AddIntrinsic(avgInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) =>
+                {
+                    Operand res = context.Add(op1, op2);
+
+                    res = context.Add(res, Const(1L));
+
+                    return context.ShiftRightUI(res, Const(1));
+                });
+            }
+        }
+
+        public static void Usqadd_S(ArmEmitterContext context)
+        {
+            EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+        }
+
+        public static void Usqadd_V(ArmEmitterContext context)
+        {
+            EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate);
+        }
+
+        public static void Usubl_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+                n = context.AddIntrinsic(movInst, n);
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+                context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+            }
+            else
+            {
+                EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+            }
+        }
+
+        public static void Usubw_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8));
+                }
+
+                Intrinsic movInst = X86PmovzxInstruction[op.Size];
+
+                m = context.AddIntrinsic(movInst, m);
+
+                Intrinsic subInst = X86PsubInstruction[op.Size + 1];
+
+                context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m));
+            }
+            else
+            {
+                EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2));
+            }
+        }
+
+        private static Operand EmitAbs(ArmEmitterContext context, Operand value)
+        {
+            Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
+
+            return context.ConditionalSelect(isPositive, value, context.Negate(value));
+        }
+
+        private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int pairs = op.GetPairsCount() >> op.Size;
+
+            for (int index = 0; index < pairs; index++)
+            {
+                int pairIndex = index << 1;
+
+                Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex,     op.Size, signed);
+                Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+                Operand e = context.Add(ne0, ne1);
+
+                if (accumulate)
+                {
+                    Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
+
+                    e = context.Add(e, de);
+                }
+
+                res = EmitVectorInsert(context, res, e, index, op.Size + 1);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static Operand EmitDoublingMultiplyHighHalf(
+            ArmEmitterContext context,
+            Operand n,
+            Operand m,
+            bool round)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            int eSize = 8 << op.Size;
+
+            Operand res = context.Multiply(n, m);
+
+            if (!round)
+            {
+                res = context.ShiftRightSI(res, Const(eSize - 1));
+            }
+            else
+            {
+                long roundConst = 1L << (eSize - 1);
+
+                res = context.ShiftLeft(res, Const(1));
+
+                res = context.Add(res, Const(roundConst));
+
+                res = context.ShiftRightSI(res, Const(eSize));
+
+                Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue));
+
+                res = context.ConditionalSelect(isIntMin, context.Negate(res), res);
+            }
+
+            return res;
+        }
+
+        private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            int elems = 8 >> op.Size;
+            int eSize = 8 << op.Size;
+
+            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+            Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+            long roundConst = 1L << (eSize - 1);
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1);
+
+                Operand de = emit(ne, me);
+
+                if (round)
+                {
+                    de = context.Add(de, Const(roundConst));
+                }
+
+                de = context.ShiftRightUI(de, Const(eSize));
+
+                res = EmitVectorInsert(context, res, de, part + index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss;
+
+            Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+
+            if ((op.Size & 1) != 0)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+            else
+            {
+                res = context.VectorZeroUpper96(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps;
+
+            Operand res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode)));
+
+            if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private enum AddSub
+        {
+            None,
+            Add,
+            Subtract
+        }
+
+        private static void EmitSse41Mul_AddSub(ArmEmitterContext context, AddSub addSub)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            Operand res = null;
+
+            if (op.Size == 0)
+            {
+                Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8));
+                Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8));
+
+                res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8);
+
+                res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8));
+
+                Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+
+                Operand mask = X86GetAllElements(context, 0x00FF00FF);
+
+                res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask);
+            }
+            else if (op.Size == 1)
+            {
+                res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m);
+            }
+            else
+            {
+                res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m);
+            }
+
+            Operand d = GetVec(op.Rd);
+
+            if (addSub == AddSub.Add)
+            {
+                switch (op.Size)
+                {
+                    case 0: res = context.AddIntrinsic(Intrinsic.X86Paddb, d, res); break;
+                    case 1: res = context.AddIntrinsic(Intrinsic.X86Paddw, d, res); break;
+                    case 2: res = context.AddIntrinsic(Intrinsic.X86Paddd, d, res); break;
+                    case 3: res = context.AddIntrinsic(Intrinsic.X86Paddq, d, res); break;
+                }
+            }
+            else if (addSub == AddSub.Subtract)
+            {
+                switch (op.Size)
+                {
+                    case 0: res = context.AddIntrinsic(Intrinsic.X86Psubb, d, res); break;
+                    case 1: res = context.AddIntrinsic(Intrinsic.X86Psubw, d, res); break;
+                    case 2: res = context.AddIntrinsic(Intrinsic.X86Psubd, d, res); break;
+                    case 3: res = context.AddIntrinsic(Intrinsic.X86Psubq, d, res); break;
+                }
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+
+            context.Copy(d, res);
+        }
+
+        private static void EmitSse41Sabd(
+            ArmEmitterContext context,
+            OpCodeSimdReg op,
+            Operand n,
+            Operand m,
+            bool isLong)
+        {
+            int size = isLong ? op.Size + 1 : op.Size;
+
+            Intrinsic cmpgtInst = X86PcmpgtInstruction[size];
+
+            Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m);
+
+            Intrinsic subInst = X86PsubInstruction[size];
+
+            Operand res = context.AddIntrinsic(subInst, n, m);
+
+            res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res);
+
+            Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+            res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+            res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+            if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static void EmitSse41Uabd(
+            ArmEmitterContext context,
+            OpCodeSimdReg op,
+            Operand n,
+            Operand m,
+            bool isLong)
+        {
+            int size = isLong ? op.Size + 1 : op.Size;
+
+            Intrinsic maxInst = X86PmaxuInstruction[size];
+
+            Operand max = context.AddIntrinsic(maxInst, m, n);
+
+            Intrinsic cmpeqInst = X86PcmpeqInstruction[size];
+
+            Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m);
+
+            Operand onesMask = X86GetAllElements(context, -1L);
+
+            cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask);
+
+            Intrinsic subInst = X86PsubInstruction[size];
+
+            Operand res  = context.AddIntrinsic(subInst, n, m);
+            Operand res2 = context.AddIntrinsic(subInst, m, n);
+
+            res  = context.AddIntrinsic(Intrinsic.X86Pand,  cmpMask, res);
+            res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2);
+
+            res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+
+            if (!isLong && op.RegisterSize == RegisterSize.Simd64)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
new file mode 100644
index 000000000..f27121bb3
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdCmp.cs
@@ -0,0 +1,712 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    using Func2I = Func<Operand, Operand, Operand>;
+
+    static partial class InstEmit
+    {
+        public static void Cmeq_S(ArmEmitterContext context)
+        {
+            EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true);
+        }
+
+        public static void Cmeq_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m;
+
+                if (op is OpCodeSimdReg binOp)
+                {
+                    m = GetVec(binOp.Rm);
+                }
+                else
+                {
+                    m = context.VectorZero();
+                }
+
+                Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false);
+            }
+        }
+
+        public static void Cmge_S(ArmEmitterContext context)
+        {
+            EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true);
+        }
+
+        public static void Cmge_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse42)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m;
+
+                if (op is OpCodeSimdReg binOp)
+                {
+                    m = GetVec(binOp.Rm);
+                }
+                else
+                {
+                    m = context.VectorZero();
+                }
+
+                Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(cmpInst, m, n);
+
+                Operand mask = X86GetAllElements(context, -1L);
+
+                res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false);
+            }
+        }
+
+        public static void Cmgt_S(ArmEmitterContext context)
+        {
+            EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true);
+        }
+
+        public static void Cmgt_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse42)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m;
+
+                if (op is OpCodeSimdReg binOp)
+                {
+                    m = GetVec(binOp.Rm);
+                }
+                else
+                {
+                    m = context.VectorZero();
+                }
+
+                Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(cmpInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false);
+            }
+        }
+
+        public static void Cmhi_S(ArmEmitterContext context)
+        {
+            EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true);
+        }
+
+        public static void Cmhi_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse41 && op.Size < 3)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(maxInst, m, n);
+
+                Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+                res = context.AddIntrinsic(cmpInst, res, m);
+
+                Operand mask = X86GetAllElements(context, -1L);
+
+                res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false);
+            }
+        }
+
+        public static void Cmhs_S(ArmEmitterContext context)
+        {
+            EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true);
+        }
+
+        public static void Cmhs_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse41 && op.Size < 3)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Intrinsic maxInst = X86PmaxuInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(maxInst, n, m);
+
+                Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];
+
+                res = context.AddIntrinsic(cmpInst, res, n);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false);
+            }
+        }
+
+        public static void Cmle_S(ArmEmitterContext context)
+        {
+            EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true);
+        }
+
+        public static void Cmle_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse42)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero());
+
+                Operand mask = X86GetAllElements(context, -1L);
+
+                res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false);
+            }
+        }
+
+        public static void Cmlt_S(ArmEmitterContext context)
+        {
+            EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true);
+        }
+
+        public static void Cmlt_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse42)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false);
+            }
+        }
+
+        public static void Cmtst_S(ArmEmitterContext context)
+        {
+            EmitCmtstOp(context, scalar: true);
+        }
+
+        public static void Cmtst_V(ArmEmitterContext context)
+        {
+            EmitCmtstOp(context, scalar: false);
+        }
+
+        public static void Fccmp_S(ArmEmitterContext context)
+        {
+            EmitFccmpOrFccmpe(context, signalNaNs: false);
+        }
+
+        public static void Fccmpe_S(ArmEmitterContext context)
+        {
+            EmitFccmpOrFccmpe(context, signalNaNs: true);
+        }
+
+        public static void Fcmeq_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: true);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: true);
+            }
+        }
+
+        public static void Fcmeq_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.Equal, scalar: false);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareEQ, SoftFloat64.FPCompareEQ, scalar: false);
+            }
+        }
+
+        public static void Fcmge_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: true);
+            }
+        }
+
+        public static void Fcmge_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareGE, SoftFloat64.FPCompareGE, scalar: false);
+            }
+        }
+
+        public static void Fcmgt_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: true);
+            }
+        }
+
+        public static void Fcmgt_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareGT, SoftFloat64.FPCompareGT, scalar: false);
+            }
+        }
+
+        public static void Fcmle_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: true);
+            }
+        }
+
+        public static void Fcmle_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareLE, SoftFloat64.FPCompareLE, scalar: false);
+            }
+        }
+
+        public static void Fcmlt_S(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: true);
+            }
+        }
+
+        public static void Fcmlt_V(ArmEmitterContext context)
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true);
+            }
+            else
+            {
+                EmitCmpOpF(context, SoftFloat32.FPCompareLT, SoftFloat64.FPCompareLT, scalar: false);
+            }
+        }
+
+        public static void Fcmp_S(ArmEmitterContext context)
+        {
+            EmitFcmpOrFcmpe(context, signalNaNs: false);
+        }
+
+        public static void Fcmpe_S(ArmEmitterContext context)
+        {
+            EmitFcmpOrFcmpe(context, signalNaNs: true);
+        }
+
+        public static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
+        {
+            OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+            Operand lblTrue = Label();
+            Operand lblEnd  = Label();
+
+            context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
+
+            EmitSetNzcv(context, Const(op.Nzcv));
+
+            context.Branch(lblEnd);
+
+            context.MarkLabel(lblTrue);
+
+            EmitFcmpOrFcmpe(context, signalNaNs);
+
+            context.MarkLabel(lblEnd);
+        }
+
+        private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            const int cmpOrdered = 7;
+
+            bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
+
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
+
+                Operand lblNaN = Label();
+                Operand lblEnd = Label();
+
+                if (op.Size == 0)
+                {
+                    Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const(cmpOrdered));
+
+                    Operand isOrdered = context.VectorExtract16(ordMask, 0);
+
+                    context.BranchIfFalse(lblNaN, isOrdered);
+
+                    Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
+                    Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
+                    Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
+
+                    SetFlag(context, PState.VFlag, Const(0));
+                    SetFlag(context, PState.CFlag, cf);
+                    SetFlag(context, PState.ZFlag, zf);
+                    SetFlag(context, PState.NFlag, nf);
+                }
+                else /* if (op.Size == 1) */
+                {
+                    Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const(cmpOrdered));
+
+                    Operand isOrdered = context.VectorExtract16(ordMask, 0);
+
+                    context.BranchIfFalse(lblNaN, isOrdered);
+
+                    Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
+                    Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
+                    Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
+
+                    SetFlag(context, PState.VFlag, Const(0));
+                    SetFlag(context, PState.CFlag, cf);
+                    SetFlag(context, PState.ZFlag, zf);
+                    SetFlag(context, PState.NFlag, nf);
+                }
+
+                context.Branch(lblEnd);
+
+                context.MarkLabel(lblNaN);
+
+                SetFlag(context, PState.VFlag, Const(1));
+                SetFlag(context, PState.CFlag, Const(1));
+                SetFlag(context, PState.ZFlag, Const(0));
+                SetFlag(context, PState.NFlag, Const(0));
+
+                context.MarkLabel(lblEnd);
+            }
+            else
+            {
+                OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32;
+
+                Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+                Operand me;
+
+                if (cmpWithZero)
+                {
+                    me = op.Size == 0 ? ConstF(0f) : ConstF(0d);
+                }
+                else
+                {
+                    me = context.VectorExtract(type, GetVec(op.Rm), 0);
+                }
+
+                Delegate dlg = op.Size != 0
+                    ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare)
+                    : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare);
+
+                Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs));
+
+                EmitSetNzcv(context, nzcv);
+            }
+        }
+
+        private static void EmitSetNzcv(ArmEmitterContext context, Operand nzcv)
+        {
+            Operand Extract(Operand value, int bit)
+            {
+                if (bit != 0)
+                {
+                    value = context.ShiftRightUI(value, Const(bit));
+                }
+
+                value = context.BitwiseAnd(value, Const(1));
+
+                return value;
+            }
+
+            SetFlag(context, PState.VFlag, Extract(nzcv, 0));
+            SetFlag(context, PState.CFlag, Extract(nzcv, 1));
+            SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
+            SetFlag(context, PState.NFlag, Extract(nzcv, 3));
+        }
+
+        private static void EmitCmpOp(ArmEmitterContext context, Func2I emitCmp, bool scalar)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+            ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+                Operand me;
+
+                if (op is OpCodeSimdReg binOp)
+                {
+                    me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size);
+                }
+                else
+                {
+                    me = Const(0L);
+                }
+
+                Operand isTrue = emitCmp(ne, me);
+
+                Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+                res = EmitVectorInsert(context, res, mask, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static void EmitCmtstOp(ArmEmitterContext context, bool scalar)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+            ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+                Operand test = context.BitwiseAnd(ne, me);
+
+                Operand isTrue = context.ICompareNotEqual(test, Const(0L));
+
+                Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L));
+
+                res = EmitVectorInsert(context, res, mask, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static void EmitCmpOpF(
+            ArmEmitterContext context,
+            _F32_F32_F32 f32,
+            _F64_F64_F64 f64,
+            bool scalar)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int sizeF = op.Size & 1;
+
+            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+                Operand me;
+
+                if (op is OpCodeSimdReg binOp)
+                {
+                    me = context.VectorExtract(type, GetVec(binOp.Rm), index);
+                }
+                else
+                {
+                    me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
+                }
+
+                Operand e = EmitSoftFloatCall(context, f32, f64, ne, me);
+
+                res = context.VectorInsert(res, e, index);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private enum CmpCondition
+        {
+            Equal              = 0,
+            GreaterThanOrEqual = 5,
+            GreaterThan        = 6
+        }
+
+        private static void EmitCmpSseOrSse2OpF(
+            ArmEmitterContext context,
+            CmpCondition cond,
+            bool scalar,
+            bool isLeOrLt = false)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero();
+
+            int sizeF = op.Size & 1;
+
+            if (sizeF == 0)
+            {
+                Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
+
+                Operand res = isLeOrLt
+                    ? context.AddIntrinsic(inst, m, n, Const((int)cond))
+                    : context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+                if (scalar)
+                {
+                    res = context.VectorZeroUpper96(res);
+                }
+                else if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else /* if (sizeF == 1) */
+            {
+                Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
+
+                Operand res = isLeOrLt
+                    ? context.AddIntrinsic(inst, m, n, Const((int)cond))
+                    : context.AddIntrinsic(inst, n, m, Const((int)cond));
+
+                if (scalar)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
new file mode 100644
index 000000000..2b61fadac
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdCrypto.cs
@@ -0,0 +1,49 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Aesd_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n));
+        }
+
+        public static void Aese_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n));
+        }
+
+        public static void Aesimc_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n));
+        }
+
+        public static void Aesmc_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            context.Copy(GetVec(op.Rd), context.Call(new _V128_V128(SoftFallback.MixColumns), n));
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
new file mode 100644
index 000000000..012bfcce2
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs
@@ -0,0 +1,1166 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    using Func1I = Func<Operand, Operand>;
+
+    static partial class InstEmit
+    {
+        public static void Fcvt_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            if (op.Size == 0 && op.Opc == 1) // Single -> Double.
+            {
+                if (Optimizations.UseSse2)
+                {
+                    Operand n = GetVec(op.Rn);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else
+                {
+                    Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+                    Operand res = context.ConvertToFP(OperandType.FP64, ne);
+
+                    context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                }
+            }
+            else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
+            {
+                if (Optimizations.UseSse2)
+                {
+                    Operand n = GetVec(op.Rn);
+
+                    Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else
+                {
+                    Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
+
+                    Operand res = context.ConvertToFP(OperandType.FP32, ne);
+
+                    context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+                }
+            }
+            else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
+            {
+                Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
+
+                Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert);
+
+                Operand res = context.Call(dlg, ne);
+
+                res = context.ZeroExtend16(OperandType.I64, res);
+
+                context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
+            }
+            else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
+
+                Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert);
+
+                Operand res = context.Call(dlg, ne);
+
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
+            else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
+            {
+                throw new NotImplementedException("Double-precision to half-precision.");
+            }
+            else if (op.Size == 3 && op.Opc == 1) // Double -> Half.
+            {
+                throw new NotImplementedException("Half-precision to double-precision.");
+            }
+            else // Invalid encoding.
+            {
+                Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
+            }
+        }
+
+        public static void Fcvtas_Gp(ArmEmitterContext context)
+        {
+            EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+        }
+
+        public static void Fcvtau_Gp(ArmEmitterContext context)
+        {
+            EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
+        }
+
+        public static void Fcvtl_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 1)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand res;
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    res = context.AddIntrinsic(Intrinsic.X86Movhlps, n, n);
+                }
+                else
+                {
+                    res = n;
+                }
+
+                res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Operand res = context.VectorZero();
+
+                int elems = 4 >> sizeF;
+
+                int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+                for (int index = 0; index < elems; index++)
+                {
+                    if (sizeF == 0)
+                    {
+                        Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
+
+                        Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert);
+
+                        Operand e = context.Call(dlg, ne);
+
+                        res = context.VectorInsert(res, e, index);
+                    }
+                    else /* if (sizeF == 1) */
+                    {
+                        Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
+
+                        Operand e = context.ConvertToFP(OperandType.FP64, ne);
+
+                        res = context.VectorInsert(res, e, index);
+                    }
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        public static void Fcvtms_Gp(ArmEmitterContext context)
+        {
+            EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1));
+        }
+
+        public static void Fcvtmu_Gp(ArmEmitterContext context)
+        {
+            EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Floor, Math.Floor, op1));
+        }
+
+        public static void Fcvtn_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 1)
+            {
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+                Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, n);
+
+                nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
+
+                Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+                    ? Intrinsic.X86Movlhps
+                    : Intrinsic.X86Movhlps;
+
+                res = context.AddIntrinsic(movInst, res, nInt);
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+                int elems = 4 >> sizeF;
+
+                int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+                Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+                    if (sizeF == 0)
+                    {
+                        Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert);
+
+                        Operand e = context.Call(dlg, ne);
+
+                        e = context.ZeroExtend16(OperandType.I64, e);
+
+                        res = EmitVectorInsert(context, res, e, part + index, 1);
+                    }
+                    else /* if (sizeF == 1) */
+                    {
+                        Operand e = context.ConvertToFP(OperandType.FP32, ne);
+
+                        res = context.VectorInsert(res, e, part + index);
+                    }
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        public static void Fcvtns_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: true);
+            }
+            else
+            {
+                EmitFcvtn(context, signed: true, scalar: true);
+            }
+        }
+
+        public static void Fcvtns_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvts(context, FPRoundingMode.ToNearest, scalar: false);
+            }
+            else
+            {
+                EmitFcvtn(context, signed: true, scalar: false);
+            }
+        }
+
+        public static void Fcvtnu_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: true);
+            }
+            else
+            {
+                EmitFcvtn(context, signed: false, scalar: true);
+            }
+        }
+
+        public static void Fcvtnu_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvtu(context, FPRoundingMode.ToNearest, scalar: false);
+            }
+            else
+            {
+                EmitFcvtn(context, signed: false, scalar: false);
+            }
+        }
+
+        public static void Fcvtps_Gp(ArmEmitterContext context)
+        {
+            EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1));
+        }
+
+        public static void Fcvtpu_Gp(ArmEmitterContext context)
+        {
+            EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, op1));
+        }
+
+        public static void Fcvtzs_Gp(ArmEmitterContext context)
+        {
+            EmitFcvt_s_Gp(context, (op1) => op1);
+        }
+
+        public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
+        {
+            EmitFcvtzs_Gp_Fixed(context);
+        }
+
+        public static void Fcvtzs_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: true);
+            }
+            else
+            {
+                EmitFcvtz(context, signed: true, scalar: true);
+            }
+        }
+
+        public static void Fcvtzs_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false);
+            }
+            else
+            {
+                EmitFcvtz(context, signed: true, scalar: false);
+            }
+        }
+
+        public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvts(context, FPRoundingMode.TowardsZero, scalar: false);
+            }
+            else
+            {
+                EmitFcvtz(context, signed: true, scalar: false);
+            }
+        }
+
+        public static void Fcvtzu_Gp(ArmEmitterContext context)
+        {
+            EmitFcvt_u_Gp(context, (op1) => op1);
+        }
+
+        public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
+        {
+            EmitFcvtzu_Gp_Fixed(context);
+        }
+
+        public static void Fcvtzu_S(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: true);
+            }
+            else
+            {
+                EmitFcvtz(context, signed: false, scalar: true);
+            }
+        }
+
+        public static void Fcvtzu_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false);
+            }
+            else
+            {
+                EmitFcvtz(context, signed: false, scalar: false);
+            }
+        }
+
+        public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse41)
+            {
+                EmitSse41Fcvtu(context, FPRoundingMode.TowardsZero, scalar: false);
+            }
+            else
+            {
+                EmitFcvtz(context, signed: false, scalar: false);
+            }
+        }
+
+        public static void Scvtf_Gp(ArmEmitterContext context)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            Operand res = GetIntOrZR(context, op.Rn);
+
+            if (op.RegisterSize == RegisterSize.Int32)
+            {
+                res = context.SignExtend32(OperandType.I64, res);
+            }
+
+            res = EmitFPConvert(context, res, op.Size, signed: true);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+        }
+
+        public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            Operand res = GetIntOrZR(context, op.Rn);
+
+            if (op.RegisterSize == RegisterSize.Int32)
+            {
+                res = context.SignExtend32(OperandType.I64, res);
+            }
+
+            res = EmitFPConvert(context, res, op.Size, signed: true);
+
+            res = EmitI2fFBitsMul(context, res, op.FBits);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+        }
+
+        public static void Scvtf_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2Scvtf(context, scalar: true);
+            }
+            else
+            {
+                Operand res = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
+
+                res = EmitFPConvert(context, res, op.Size, signed: true);
+
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
+        }
+
+        public static void Scvtf_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2Scvtf(context, scalar: false);
+            }
+            else
+            {
+                EmitVectorCvtf(context, signed: true);
+            }
+        }
+
+        public static void Scvtf_V_Fixed(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2Scvtf(context, scalar: false);
+            }
+            else
+            {
+                EmitVectorCvtf(context, signed: true);
+            }
+        }
+
+        public static void Ucvtf_Gp(ArmEmitterContext context)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            Operand res = GetIntOrZR(context, op.Rn);
+
+            res = EmitFPConvert(context, res, op.Size, signed: false);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+        }
+
+        public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            Operand res = GetIntOrZR(context, op.Rn);
+
+            res = EmitFPConvert(context, res, op.Size, signed: false);
+
+            res = EmitI2fFBitsMul(context, res, op.FBits);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+        }
+
+        public static void Ucvtf_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2Ucvtf(context, scalar: true);
+            }
+            else
+            {
+                Operand ne = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2);
+
+                Operand res = EmitFPConvert(context, ne, sizeF, signed: false);
+
+                context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
+            }
+        }
+
+        public static void Ucvtf_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2Ucvtf(context, scalar: false);
+            }
+            else
+            {
+                EmitVectorCvtf(context, signed: false);
+            }
+        }
+
+        public static void Ucvtf_V_Fixed(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            // sizeF == ((OpCodeSimdShImm)op).Size - 2
+            int sizeF = op.Size & 1;
+
+            if (Optimizations.UseSse2 && sizeF == 0)
+            {
+                EmitSse2Ucvtf(context, scalar: false);
+            }
+            else
+            {
+                EmitVectorCvtf(context, signed: false);
+            }
+        }
+
+        private static void EmitFcvtn(ArmEmitterContext context, bool signed, bool scalar)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            Operand n = GetVec(op.Rn);
+
+            int sizeF = op.Size & 1;
+            int sizeI = sizeF + 2;
+
+            OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+            int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = context.VectorExtract(type, n, index);
+
+                Operand e = EmitRoundMathCall(context, MidpointRounding.ToEven, ne);
+
+                if (sizeF == 0)
+                {
+                    Delegate dlg = signed
+                        ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
+                        : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32);
+
+                    e = context.Call(dlg, e);
+
+                    e = context.ZeroExtend32(OperandType.I64, e);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Delegate dlg = signed
+                        ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64)
+                        : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
+
+                    e = context.Call(dlg, e);
+                }
+
+                res = EmitVectorInsert(context, res, e, index, sizeI);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            Operand n = GetVec(op.Rn);
+
+            int sizeF = op.Size & 1;
+            int sizeI = sizeF + 2;
+
+            OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
+
+            int fBits = GetFBits(context);
+
+            int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = context.VectorExtract(type, n, index);
+
+                Operand e = EmitF2iFBitsMul(context, ne, fBits);
+
+                if (sizeF == 0)
+                {
+                    Delegate dlg = signed
+                        ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
+                        : (Delegate)new _U32_F32(SoftFallback.SatF32ToU32);
+
+                    e = context.Call(dlg, e);
+
+                    e = context.ZeroExtend32(OperandType.I64, e);
+                }
+                else /* if (sizeF == 1) */
+                {
+                    Delegate dlg = signed
+                        ? (Delegate)new _S64_F64(SoftFallback.SatF64ToS64)
+                        : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
+
+                    e = context.Call(dlg, e);
+                }
+
+                res = EmitVectorInsert(context, res, e, index, sizeI);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
+        {
+            EmitFcvt___Gp(context, emit, signed: true);
+        }
+
+        private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
+        {
+            EmitFcvt___Gp(context, emit, signed: false);
+        }
+
+        private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+            Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+            Operand res = signed
+                ? EmitScalarFcvts(context, emit(ne), 0)
+                : EmitScalarFcvtu(context, emit(ne), 0);
+
+            SetIntOrZR(context, op.Rd, res);
+        }
+
+        private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
+        {
+            EmitFcvtz__Gp_Fixed(context, signed: true);
+        }
+
+        private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
+        {
+            EmitFcvtz__Gp_Fixed(context, signed: false);
+        }
+
+        private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
+        {
+            OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
+
+            OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+            Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+            Operand res = signed
+                ? EmitScalarFcvts(context, ne, op.FBits)
+                : EmitScalarFcvtu(context, ne, op.FBits);
+
+            SetIntOrZR(context, op.Rd, res);
+        }
+
+        private static void EmitVectorCvtf(ArmEmitterContext context, bool signed)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int sizeF = op.Size & 1;
+            int sizeI = sizeF + 2;
+
+            int fBits = GetFBits(context);
+
+            int elems = op.GetBytesCount() >> sizeI;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
+
+                Operand e = EmitFPConvert(context, ne, sizeF, signed);
+
+                e = EmitI2fFBitsMul(context, e, fBits);
+
+                res = context.VectorInsert(res, e, index);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static int GetFBits(ArmEmitterContext context)
+        {
+            if (context.CurrOp is OpCodeSimdShImm op)
+            {
+                return GetImmShr(op);
+            }
+
+            return 0;
+        }
+
+        private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
+        {
+            Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
+            Debug.Assert((uint)size < 2);
+
+            OperandType type = size == 0 ? OperandType.FP32
+                                         : OperandType.FP64;
+
+            if (signed)
+            {
+                return context.ConvertToFP(type, value);
+            }
+            else
+            {
+                return context.ConvertToFPUI(type, value);
+            }
+        }
+
+        private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
+        {
+            Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+            value = EmitF2iFBitsMul(context, value, fBits);
+
+            if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+            {
+                Delegate dlg = value.Type == OperandType.FP32
+                    ? (Delegate)new _S32_F32(SoftFallback.SatF32ToS32)
+                    : (Delegate)new _S32_F64(SoftFallback.SatF64ToS32);
+
+                return context.Call(dlg, value);
+            }
+            else
+            {
+                Delegate dlg = value.Type == OperandType.FP32
+                    ? (Delegate)new _S64_F32(SoftFallback.SatF32ToS64)
+                    : (Delegate)new _S64_F64(SoftFallback.SatF64ToS64);
+
+                return context.Call(dlg, value);
+            }
+        }
+
+        private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
+        {
+            Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+            value = EmitF2iFBitsMul(context, value, fBits);
+
+            if (context.CurrOp.RegisterSize == RegisterSize.Int32)
+            {
+                Delegate dlg = value.Type == OperandType.FP32
+                    ? (Delegate)new _U32_F32(SoftFallback.SatF32ToU32)
+                    : (Delegate)new _U32_F64(SoftFallback.SatF64ToU32);
+
+                return context.Call(dlg, value);
+            }
+            else
+            {
+                Delegate dlg = value.Type == OperandType.FP32
+                    ? (Delegate)new _U64_F32(SoftFallback.SatF32ToU64)
+                    : (Delegate)new _U64_F64(SoftFallback.SatF64ToU64);
+
+                return context.Call(dlg, value);
+            }
+        }
+
+        private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+        {
+            Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+            if (fBits == 0)
+            {
+                return value;
+            }
+
+            if (value.Type == OperandType.FP32)
+            {
+                return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
+            }
+            else /* if (value.Type == OperandType.FP64) */
+            {
+                return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
+            }
+        }
+
+        private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
+        {
+            Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
+
+            if (fBits == 0)
+            {
+                return value;
+            }
+
+            if (value.Type == OperandType.FP32)
+            {
+                return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
+            }
+            else /* if (value.Type == OperandType.FP64) */
+            {
+                return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
+            }
+        }
+
+        private static void EmitSse41Fcvts(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            const int cmpGreaterThanOrEqual = 5;
+            const int cmpOrdered            = 7;
+
+            // sizeF == ((OpCodeSimdShImm64)op).Size - 2
+            int sizeF = op.Size & 1;
+
+            if (sizeF == 0)
+            {
+                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
+
+                Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+                if (op is OpCodeSimdShImm fixedOp)
+                {
+                    int fBits = GetImmShr(fixedOp);
+
+                    // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+                    int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+                    Operand scale = X86GetAllElements(context, fpScaled);
+
+                    nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale);
+                }
+
+                Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
+
+                Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRnd);
+
+                Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const(cmpGreaterThanOrEqual));
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
+
+                if (scalar)
+                {
+                    res = context.VectorZeroUpper96(res);
+                }
+                else if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else /* if (sizeF == 1) */
+            {
+                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
+
+                Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+                if (op is OpCodeSimdShImm fixedOp)
+                {
+                    int fBits = GetImmShr(fixedOp);
+
+                    // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+                    long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+                    Operand scale = X86GetAllElements(context, fpScaled);
+
+                    nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale);
+                }
+
+                Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
+
+                Operand high;
+
+                if (!scalar)
+                {
+                    high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRnd, nRnd);
+                    high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
+                }
+                else
+                {
+                    high = Const(0L);
+                }
+
+                Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRnd);
+
+                Operand nInt = EmitVectorLongCreate(context, low, high);
+
+                Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const(cmpGreaterThanOrEqual));
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
+
+                if (scalar)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        private static void EmitSse41Fcvtu(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            const int cmpGreaterThanOrEqual = 5;
+            const int cmpGreaterThan        = 6;
+            const int cmpOrdered            = 7;
+
+            // sizeF == ((OpCodeSimdShImm)op).Size - 2
+            int sizeF = op.Size & 1;
+
+            if (sizeF == 0)
+            {
+                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
+
+                Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+                if (op is OpCodeSimdShImm fixedOp)
+                {
+                    int fBits = GetImmShr(fixedOp);
+
+                    // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
+                    int fpScaled = 0x3F800000 + fBits * 0x800000;
+
+                    Operand scale = X86GetAllElements(context, fpScaled);
+
+                    nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale);
+                }
+
+                Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
+
+                Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const(cmpGreaterThan));
+
+                Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
+
+                Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRndMasked);
+
+                Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask);
+
+                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const(cmpGreaterThan));
+
+                Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
+
+                res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked);
+
+                Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const(cmpGreaterThanOrEqual));
+
+                res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
+                res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt);
+
+                if (scalar)
+                {
+                    res = context.VectorZeroUpper96(res);
+                }
+                else if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else /* if (sizeF == 1) */
+            {
+                Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
+
+                Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
+
+                if (op is OpCodeSimdShImm fixedOp)
+                {
+                    int fBits = GetImmShr(fixedOp);
+
+                    // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
+                    long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
+
+                    Operand scale = X86GetAllElements(context, fpScaled);
+
+                    nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale);
+                }
+
+                Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
+
+                Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const(cmpGreaterThan));
+
+                Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
+
+                Operand high;
+
+                if (!scalar)
+                {
+                    high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRndMasked, nRndMasked);
+                    high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
+                }
+                else
+                {
+                    high = Const(0L);
+                }
+
+                Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRndMasked);
+
+                Operand nInt = EmitVectorLongCreate(context, low, high);
+
+                Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask);
+
+                Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const(cmpGreaterThan));
+
+                Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
+
+                if (!scalar)
+                {
+                    high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, resMasked, resMasked);
+                    high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high);
+                }
+
+                low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, resMasked);
+
+                res = EmitVectorLongCreate(context, low, high);
+
+                Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const(cmpGreaterThanOrEqual));
+
+                res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
+                res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt);
+
+                if (scalar)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        private static void EmitSse2Scvtf(ArmEmitterContext context, bool scalar)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+
+            if (op is OpCodeSimdShImm fixedOp)
+            {
+                int fBits = GetImmShr(fixedOp);
+
+                // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+                int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+                Operand scale = X86GetAllElements(context, fpScaled);
+
+                res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale);
+            }
+
+            if (scalar)
+            {
+                res = context.VectorZeroUpper96(res);
+            }
+            else if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static void EmitSse2Ucvtf(ArmEmitterContext context, bool scalar)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+
+            res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+
+            Operand mask = X86GetAllElements(context, 0x47800000); // 65536.0f (1 << 16)
+
+            res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+            Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+
+            res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+            res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+            res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+
+            if (op is OpCodeSimdShImm fixedOp)
+            {
+                int fBits = GetImmShr(fixedOp);
+
+                // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
+                int fpScaled = 0x3F800000 - fBits * 0x800000;
+
+                Operand scale = X86GetAllElements(context, fpScaled);
+
+                res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale);
+            }
+
+            if (scalar)
+            {
+                res = context.VectorZeroUpper96(res);
+            }
+            else if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
+        {
+            OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
+
+            return context.VectorExtract(type, GetVec(reg), index);
+        }
+
+        private static Operand EmitVectorLongCreate(ArmEmitterContext context, Operand low, Operand high)
+        {
+            Operand vector = context.VectorCreateScalar(low);
+
+            vector = context.VectorInsert(vector, high, 1);
+
+            return vector;
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdHash.cs b/ARMeilleure/Instructions/InstEmitSimdHash.cs
new file mode 100644
index 000000000..4ed960612
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHash.cs
@@ -0,0 +1,147 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+#region "Sha1"
+        public static void Sha1c_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+
+            Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+            Operand m = GetVec(op.Rm);
+
+            Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashChoose), d, ne, m);
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sha1h_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+            Operand res = context.Call(new _U32_U32(SoftFallback.FixedRotate), ne);
+
+            context.Copy(GetVec(op.Rd), context.VectorCreateScalar(res));
+        }
+
+        public static void Sha1m_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+
+            Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+            Operand m = GetVec(op.Rm);
+
+            Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashMajority), d, ne, m);
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sha1p_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+
+            Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0);
+
+            Operand m = GetVec(op.Rm);
+
+            Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashParity), d, ne, m);
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sha1su0_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha1SchedulePart1), d, n, m);
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sha1su1_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha1SchedulePart2), d, n);
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+#endregion
+
+#region "Sha256"
+        public static void Sha256h_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashLower), d, n, m);
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sha256h2_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.HashUpper), d, n, m);
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sha256su0_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+
+            Operand res = context.Call(new _V128_V128_V128(SoftFallback.Sha256SchedulePart1), d, n);
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sha256su1_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            Operand res = context.Call(new _V128_V128_V128_V128(SoftFallback.Sha256SchedulePart2), d, n, m);
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+#endregion
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
new file mode 100644
index 000000000..a3da80fb0
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -0,0 +1,1477 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    using Func1I = Func<Operand, Operand>;
+    using Func2I = Func<Operand, Operand, Operand>;
+    using Func3I = Func<Operand, Operand, Operand, Operand>;
+
+    static class InstEmitSimdHelper
+    {
+#region "X86 SSE Intrinsics"
+        public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Paddb,
+            Intrinsic.X86Paddw,
+            Intrinsic.X86Paddd,
+            Intrinsic.X86Paddq
+        };
+
+        public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Pcmpeqb,
+            Intrinsic.X86Pcmpeqw,
+            Intrinsic.X86Pcmpeqd,
+            Intrinsic.X86Pcmpeqq
+        };
+
+        public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Pcmpgtb,
+            Intrinsic.X86Pcmpgtw,
+            Intrinsic.X86Pcmpgtd,
+            Intrinsic.X86Pcmpgtq
+        };
+
+        public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Pmaxsb,
+            Intrinsic.X86Pmaxsw,
+            Intrinsic.X86Pmaxsd
+        };
+
+        public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Pmaxub,
+            Intrinsic.X86Pmaxuw,
+            Intrinsic.X86Pmaxud
+        };
+
+        public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Pminsb,
+            Intrinsic.X86Pminsw,
+            Intrinsic.X86Pminsd
+        };
+
+        public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Pminub,
+            Intrinsic.X86Pminuw,
+            Intrinsic.X86Pminud
+        };
+
+        public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Pmovsxbw,
+            Intrinsic.X86Pmovsxwd,
+            Intrinsic.X86Pmovsxdq
+        };
+
+        public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Pmovzxbw,
+            Intrinsic.X86Pmovzxwd,
+            Intrinsic.X86Pmovzxdq
+        };
+
+        public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
+        {
+            0,
+            Intrinsic.X86Psllw,
+            Intrinsic.X86Pslld,
+            Intrinsic.X86Psllq
+        };
+
+        public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
+        {
+            0,
+            Intrinsic.X86Psraw,
+            Intrinsic.X86Psrad
+        };
+
+        public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
+        {
+            0,
+            Intrinsic.X86Psrlw,
+            Intrinsic.X86Psrld,
+            Intrinsic.X86Psrlq
+        };
+
+        public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Psubb,
+            Intrinsic.X86Psubw,
+            Intrinsic.X86Psubd,
+            Intrinsic.X86Psubq
+        };
+
+        public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Punpckhbw,
+            Intrinsic.X86Punpckhwd,
+            Intrinsic.X86Punpckhdq,
+            Intrinsic.X86Punpckhqdq
+        };
+
+        public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
+        {
+            Intrinsic.X86Punpcklbw,
+            Intrinsic.X86Punpcklwd,
+            Intrinsic.X86Punpckldq,
+            Intrinsic.X86Punpcklqdq
+        };
+#endregion
+
+        public static int GetImmShl(OpCodeSimdShImm op)
+        {
+            return op.Imm - (8 << op.Size);
+        }
+
+        public static int GetImmShr(OpCodeSimdShImm op)
+        {
+            return (8 << (op.Size + 1)) - op.Imm;
+        }
+
+        public static Operand X86GetScalar(ArmEmitterContext context, float value)
+        {
+            return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
+        }
+
+        public static Operand X86GetScalar(ArmEmitterContext context, double value)
+        {
+            return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
+        }
+
+        public static Operand X86GetScalar(ArmEmitterContext context, int value)
+        {
+            return context.VectorCreateScalar(Const(value));
+        }
+
+        public static Operand X86GetScalar(ArmEmitterContext context, long value)
+        {
+            return context.VectorCreateScalar(Const(value));
+        }
+
+        public static Operand X86GetAllElements(ArmEmitterContext context, float value)
+        {
+            return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
+        }
+
+        public static Operand X86GetAllElements(ArmEmitterContext context, double value)
+        {
+            return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
+        }
+
+        public static Operand X86GetAllElements(ArmEmitterContext context, int value)
+        {
+            Operand vector = context.VectorCreateScalar(Const(value));
+
+            vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
+
+            return vector;
+        }
+
+        public static Operand X86GetAllElements(ArmEmitterContext context, long value)
+        {
+            Operand vector = context.VectorCreateScalar(Const(value));
+
+            vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
+
+            return vector;
+        }
+
+        public static int X86GetRoundControl(FPRoundingMode roundMode)
+        {
+            switch (roundMode)
+            {
+                case FPRoundingMode.ToNearest:            return 8 | 0;
+                case FPRoundingMode.TowardsPlusInfinity:  return 8 | 2;
+                case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
+                case FPRoundingMode.TowardsZero:          return 8 | 3;
+            }
+
+            throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
+        }
+
+        public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+            Operand res = context.AddIntrinsic(inst, n);
+
+            if ((op.Size & 1) != 0)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+            else
+            {
+                res = context.VectorZeroUpper96(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+            Operand res = context.AddIntrinsic(inst, n, m);
+
+            if ((op.Size & 1) != 0)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+            else
+            {
+                res = context.VectorZeroUpper96(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+
+            Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+            Operand res = context.AddIntrinsic(inst, n);
+
+            if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+            Operand res = context.AddIntrinsic(inst, n, m);
+
+            if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                res = context.VectorZeroUpper64(res);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static Operand EmitUnaryMathCall(ArmEmitterContext context, _F32_F32 f32, _F64_F64 f64, Operand n)
+        {
+            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+            return (op.Size & 1) == 0 ? context.Call(f32, n) : context.Call(f64, n);
+        }
+
+        public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
+        {
+            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+            Delegate dlg;
+
+            if ((op.Size & 1) == 0)
+            {
+                dlg = new _F32_F32_MidpointRounding(MathF.Round);
+            }
+            else /* if ((op.Size & 1) == 1) */
+            {
+                dlg = new _F64_F64_MidpointRounding(Math.Round);
+            }
+
+            return context.Call(dlg, n, Const((int)roundMode));
+        }
+
+        public static Operand EmitSoftFloatCall(
+            ArmEmitterContext context,
+            _F32_F32 f32,
+            _F64_F64 f64,
+            params Operand[] callArgs)
+        {
+            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+            Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64;
+
+            return context.Call(dlg, callArgs);
+        }
+
+        public static Operand EmitSoftFloatCall(
+            ArmEmitterContext context,
+            _F32_F32_F32 f32,
+            _F64_F64_F64 f64,
+            params Operand[] callArgs)
+        {
+            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+            Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64;
+
+            return context.Call(dlg, callArgs);
+        }
+
+        public static Operand EmitSoftFloatCall(
+            ArmEmitterContext context,
+            _F32_F32_F32_F32 f32,
+            _F64_F64_F64_F64 f64,
+            params Operand[] callArgs)
+        {
+            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+            Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64;
+
+            return context.Call(dlg, callArgs);
+        }
+
+        public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+            Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+        }
+
+        public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
+            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+            Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
+        }
+
+        public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+
+            Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+            context.Copy(GetVec(op.Rd), d);
+        }
+
+        public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
+            Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
+
+            Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+            context.Copy(GetVec(op.Rd), d);
+        }
+
+        public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+
+            Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
+
+            context.Copy(GetVec(op.Rd), d);
+        }
+
+        public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+            Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+            Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
+
+            context.Copy(GetVec(op.Rd), d);
+        }
+
+        public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
+            Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
+            Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
+
+            d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
+
+            context.Copy(GetVec(op.Rd), d);
+        }
+
+        public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
+        }
+
+        public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+            Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
+        }
+
+        public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
+            Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
+            Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
+        }
+
+        public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int sizeF = op.Size & 1;
+
+            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            int elems = op.GetBytesCount() >> sizeF + 2;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+
+                res = context.VectorInsert(res, emit(ne), index);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int sizeF = op.Size & 1;
+
+            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            int elems = op.GetBytesCount() >> sizeF + 2;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+                Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+                res = context.VectorInsert(res, emit(ne, me), index);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int sizeF = op.Size & 1;
+
+            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            int elems = op.GetBytesCount() >> sizeF + 2;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+                Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
+
+                res = context.VectorInsert(res, emit(de, ne, me), index);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int sizeF = op.Size & 1;
+
+            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            int elems = op.GetBytesCount() >> sizeF + 2;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+                Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+                res = context.VectorInsert(res, emit(ne, me), index);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
+        {
+            OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int sizeF = op.Size & 1;
+
+            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            int elems = op.GetBytesCount() >> sizeF + 2;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
+                Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
+                Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
+
+                res = context.VectorInsert(res, emit(de, ne, me), index);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
+        {
+            OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+            Operand imm = Const(op.Immediate);
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+            Operand imm = Const(op.Immediate);
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+                res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorWidenRmBinaryOp(context, emit, signed: true);
+        }
+
+        public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorWidenRmBinaryOp(context, emit, signed: false);
+        }
+
+        private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = 8 >> op.Size;
+
+            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtract(context, op.Rn,        index, op.Size + 1, signed);
+                Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size,     signed);
+
+                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
+        }
+
+        public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
+        }
+
+        private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = 8 >> op.Size;
+
+            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+                Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
+
+                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
+        {
+            EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
+        }
+
+        public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
+        {
+            EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
+        }
+
+        private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = 8 >> op.Size;
+
+            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = EmitVectorExtract(context, op.Rd,        index, op.Size + 1, signed);
+                Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size,     signed);
+                Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size,     signed);
+
+                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
+        }
+
+        public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
+        }
+
+        private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);;
+
+            int elems = 8 >> op.Size;
+
+            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+                res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
+        {
+            EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
+        }
+
+        public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
+        {
+            EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
+        }
+
+        private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
+        {
+            OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);;
+
+            int elems = 8 >> op.Size;
+
+            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand de = EmitVectorExtract(context, op.Rd,        index, op.Size + 1, signed);
+                Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size,     signed);
+
+                res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorPairwiseOp(context, emit, signed: true);
+        }
+
+        public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorPairwiseOp(context, emit, signed: false);
+        }
+
+        private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int pairs = op.GetPairsCount() >> op.Size;
+
+            for (int index = 0; index < pairs; index++)
+            {
+                int pairIndex = index << 1;
+
+                Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex,     op.Size, signed);
+                Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
+
+                Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex,     op.Size, signed);
+                Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
+
+                res = EmitVectorInsert(context, res, emit(n0, n1),         index, op.Size);
+                res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
+        }
+
+        public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
+        }
+
+        public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
+        }
+
+        public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
+        {
+            EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
+        }
+
+        private static void EmitVectorAcrossVectorOp(
+            ArmEmitterContext context,
+            Func2I emit,
+            bool signed,
+            bool isLong)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
+
+            for (int index = 1; index < elems; index++)
+            {
+                Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+                res = emit(res, n);
+            }
+
+            int size = isLong ? op.Size + 1 : op.Size;
+
+            Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
+
+            context.Copy(GetVec(op.Rd), d);
+        }
+
+        public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int sizeF = op.Size & 1;
+
+            OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
+
+            int pairs = op.GetPairsCount() >> sizeF + 2;
+
+            for (int index = 0; index < pairs; index++)
+            {
+                int pairIndex = index << 1;
+
+                Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
+                Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
+
+                Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
+                Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
+
+                res = context.VectorInsert(res, emit(n0, n1),         index);
+                res = context.VectorInsert(res, emit(m0, m1), pairs + index);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand n = GetVec(op.Rn);
+            Operand m = GetVec(op.Rm);
+
+            int sizeF = op.Size & 1;
+
+            if (sizeF == 0)
+            {
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
+
+                    Operand zero = context.VectorZero();
+
+                    Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
+                    Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
+
+                    context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
+                }
+                else /* if (op.RegisterSize == RegisterSize.Simd128) */
+                {
+                    const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
+                    const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
+
+                    Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0));
+                    Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1));
+
+                    context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
+                }
+            }
+            else /* if (sizeF == 1) */
+            {
+                Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m);
+                Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m);
+
+                context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1));
+            }
+        }
+
+
+        [Flags]
+        public enum SaturatingFlags
+        {
+            Scalar = 1 << 0,
+            Signed = 1 << 1,
+
+            Add = 1 << 2,
+            Sub = 1 << 3,
+
+            Accumulate = 1 << 4,
+
+            ScalarSx = Scalar | Signed,
+            ScalarZx = Scalar,
+
+            VectorSx = Signed,
+            VectorZx = 0
+        }
+
+        public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+        {
+            EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx);
+        }
+
+        public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
+        {
+            EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx);
+        }
+
+        private static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+
+            int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+                Operand de;
+
+                if (op.Size <= 2)
+                {
+                    de = EmitSatQ(context, emit(ne), op.Size, signedSrc: true, signedDst: true);
+                }
+                else /* if (op.Size == 3) */
+                {
+                    de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
+                }
+
+                res = EmitVectorInsert(context, res, de, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
+        {
+            EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarSx | flags);
+        }
+
+        public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+        {
+            EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarZx | flags);
+        }
+
+        public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
+        {
+            EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorSx | flags);
+        }
+
+        public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
+        {
+            EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorZx | flags);
+        }
+
+        public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            bool scalar = (flags & SaturatingFlags.Scalar) != 0;
+            bool signed = (flags & SaturatingFlags.Signed) != 0;
+
+            bool add = (flags & SaturatingFlags.Add) != 0;
+            bool sub = (flags & SaturatingFlags.Sub) != 0;
+
+            bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
+
+            int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+            if (add || sub)
+            {
+                OpCodeSimdReg opReg = (OpCodeSimdReg)op;
+
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand de;
+                    Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
+                    Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
+
+                    if (op.Size <= 2)
+                    {
+                        Operand temp = add ? context.Add     (ne, me)
+                                           : context.Subtract(ne, me);
+
+                        de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
+                    }
+                    else if (add) /* if (op.Size == 3) */
+                    {
+                        de = EmitBinarySatQAdd(context, ne, me, signed);
+                    }
+                    else /* if (sub) */
+                    {
+                        de = EmitBinarySatQSub(context, ne, me, signed);
+                    }
+
+                    res = EmitVectorInsert(context, res, de, index, op.Size);
+                }
+            }
+            else if (accumulate)
+            {
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand de;
+                    Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
+                    Operand me = EmitVectorExtract(context, op.Rd, index, op.Size,  signed);
+
+                    if (op.Size <= 2)
+                    {
+                        Operand temp = context.Add(ne, me);
+
+                        de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
+                    }
+                    else /* if (op.Size == 3) */
+                    {
+                        de = EmitBinarySatQAccumulate(context, ne, me, signed);
+                    }
+
+                    res = EmitVectorInsert(context, res, de, index, op.Size);
+                }
+            }
+            else
+            {
+                OpCodeSimdReg opReg = (OpCodeSimdReg)op;
+
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
+                    Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
+
+                    Operand de = EmitSatQ(context, emit(ne, me), op.Size, true, signed);
+
+                    res = EmitVectorInsert(context, res, de, index, op.Size);
+                }
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        [Flags]
+        public enum SaturatingNarrowFlags
+        {
+            Scalar    = 1 << 0,
+            SignedSrc = 1 << 1,
+            SignedDst = 1 << 2,
+
+            ScalarSxSx = Scalar | SignedSrc | SignedDst,
+            ScalarSxZx = Scalar | SignedSrc,
+            ScalarZxZx = Scalar,
+
+            VectorSxSx = SignedSrc | SignedDst,
+            VectorSxZx = SignedSrc,
+            VectorZxZx = 0
+        }
+
+        public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            bool scalar    = (flags & SaturatingNarrowFlags.Scalar)    != 0;
+            bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
+            bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
+
+            int elems = !scalar ? 8 >> op.Size : 1;
+
+            int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+            Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+                Operand temp = EmitSatQ(context, ne, op.Size, signedSrc, signedDst);
+
+                res = EmitVectorInsert(context, res, temp, part + index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
+        public static Operand EmitSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedSrc, bool signedDst)
+        {
+            if ((uint)sizeDst > 2u)
+            {
+                throw new ArgumentOutOfRangeException(nameof(sizeDst));
+            }
+
+            Delegate dlg;
+
+            if (signedSrc)
+            {
+                dlg = signedDst
+                    ? (Delegate)new _S64_S64_S32(SoftFallback.SignedSrcSignedDstSatQ)
+                    : (Delegate)new _U64_S64_S32(SoftFallback.SignedSrcUnsignedDstSatQ);
+            }
+            else
+            {
+                dlg = signedDst
+                    ? (Delegate)new _S64_U64_S32(SoftFallback.UnsignedSrcSignedDstSatQ)
+                    : (Delegate)new _U64_U64_S32(SoftFallback.UnsignedSrcUnsignedDstSatQ);
+            }
+
+            return context.Call(dlg, op, Const(sizeDst));
+        }
+
+        // TSrc (64bit) == TDst (64bit); signed.
+        public static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
+        {
+            Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+            return context.Call(new _S64_S64(SoftFallback.UnarySignedSatQAbsOrNeg), op);
+        }
+
+        // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+        public static Operand EmitBinarySatQAdd(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+        {
+            Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+            Delegate dlg = signed
+                ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQAdd)
+                : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQAdd);
+
+            return context.Call(dlg, op1, op2);
+        }
+
+        // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+        public static Operand EmitBinarySatQSub(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+        {
+            Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+            Delegate dlg = signed
+                ? (Delegate)new _S64_S64_S64(SoftFallback.BinarySignedSatQSub)
+                : (Delegate)new _U64_U64_U64(SoftFallback.BinaryUnsignedSatQSub);
+
+            return context.Call(dlg, op1, op2);
+        }
+
+        // TSrcs (64bit) == TDst (64bit); signed, unsigned.
+        public static Operand EmitBinarySatQAccumulate(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
+        {
+            Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
+
+            Delegate dlg = signed
+                ? (Delegate)new _S64_U64_S64(SoftFallback.BinarySignedSatQAcc)
+                : (Delegate)new _U64_S64_U64(SoftFallback.BinaryUnsignedSatQAcc);
+
+            return context.Call(dlg, op1, op2);
+        }
+
+        public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
+        {
+            return EmitVectorExtract(context, reg, index, size, true);
+        }
+
+        public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
+        {
+            return EmitVectorExtract(context, reg, index, size, false);
+        }
+
+        public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
+        {
+            ThrowIfInvalid(index, size);
+
+            Operand res = null;
+
+            switch (size)
+            {
+                case 0:
+                    res = context.VectorExtract8(GetVec(reg), index);
+                    break;
+
+                case 1:
+                    res = context.VectorExtract16(GetVec(reg), index);
+                    break;
+
+                case 2:
+                    res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
+                    break;
+
+                case 3:
+                    res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
+                    break;
+            }
+
+            if (signed)
+            {
+                switch (size)
+                {
+                    case 0: res = context.SignExtend8 (OperandType.I64, res); break;
+                    case 1: res = context.SignExtend16(OperandType.I64, res); break;
+                    case 2: res = context.SignExtend32(OperandType.I64, res); break;
+                }
+            }
+            else
+            {
+                switch (size)
+                {
+                    case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
+                    case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
+                    case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
+                }
+            }
+
+            return res;
+        }
+
+        public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
+        {
+            ThrowIfInvalid(index, size);
+
+            if (size < 3)
+            {
+                value = context.ConvertI64ToI32(value);
+            }
+
+            switch (size)
+            {
+                case 0: vector = context.VectorInsert8 (vector, value, index); break;
+                case 1: vector = context.VectorInsert16(vector, value, index); break;
+                case 2: vector = context.VectorInsert  (vector, value, index); break;
+                case 3: vector = context.VectorInsert  (vector, value, index); break;
+            }
+
+            return vector;
+        }
+
+        private static void ThrowIfInvalid(int index, int size)
+        {
+            if ((uint)size > 3u)
+            {
+                throw new ArgumentOutOfRangeException(nameof(size));
+            }
+
+            if ((uint)index >= 16u >> size)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
new file mode 100644
index 000000000..551752d24
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical.cs
@@ -0,0 +1,456 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void And_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2));
+            }
+        }
+
+        public static void Bic_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) =>
+                {
+                    return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+                });
+            }
+        }
+
+        public static void Bic_Vi(ArmEmitterContext context)
+        {
+            EmitVectorImmBinaryOp(context, (op1, op2) =>
+            {
+                return context.BitwiseAnd(op1, context.BitwiseNot(op2));
+            });
+        }
+
+        public static void Bif_V(ArmEmitterContext context)
+        {
+            EmitBifBit(context, notRm: true);
+        }
+
+        public static void Bit_V(ArmEmitterContext context)
+        {
+            EmitBifBit(context, notRm: false);
+        }
+
+        private static void EmitBifBit(ArmEmitterContext context, bool notRm)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse2)
+            {
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
+
+                if (notRm)
+                {
+                    res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res);
+                }
+                else
+                {
+                    res = context.AddIntrinsic(Intrinsic.X86Pand, m, res);
+                }
+
+                res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Operand res = context.VectorZero();
+
+                int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
+
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand d = EmitVectorExtractZx(context, op.Rd, index, 3);
+                    Operand n = EmitVectorExtractZx(context, op.Rn, index, 3);
+                    Operand m = EmitVectorExtractZx(context, op.Rm, index, 3);
+
+                    if (notRm)
+                    {
+                        m = context.BitwiseNot(m);
+                    }
+
+                    Operand e = context.BitwiseExclusiveOr(d, n);
+
+                    e = context.BitwiseAnd(e, m);
+                    e = context.BitwiseExclusiveOr(e, d);
+
+                    res = EmitVectorInsert(context, res, e, index, 3);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        public static void Bsl_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+                res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
+                res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
+                {
+                    return context.BitwiseExclusiveOr(
+                        context.BitwiseAnd(op1,
+                        context.BitwiseExclusiveOr(op2, op3)), op3);
+                });
+            }
+        }
+
+        public static void Eor_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2));
+            }
+        }
+
+        public static void Not_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+
+                Operand mask = X86GetAllElements(context, -1L);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1));
+            }
+        }
+
+        public static void Orn_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand mask = X86GetAllElements(context, -1L);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);
+
+                res = context.AddIntrinsic(Intrinsic.X86Por, res, n);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) =>
+                {
+                    return context.BitwiseOr(op1, context.BitwiseNot(op2));
+                });
+            }
+        }
+
+        public static void Orr_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2));
+            }
+        }
+
+        public static void Orr_Vi(ArmEmitterContext context)
+        {
+            EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2));
+        }
+
+        public static void Rbit_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0);
+
+                ne = context.ConvertI64ToI32(ne);
+
+                Operand de = context.Call(new _U32_U32(SoftFallback.ReverseBits8), ne);
+
+                de = context.ZeroExtend32(OperandType.I64, de);
+
+                res = EmitVectorInsert(context, res, de, index, 0);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Rev16_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSsse3)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+
+                const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0;
+                const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0;
+
+                Operand mask = X86GetScalar(context, maskE0);
+
+                mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitRev_V(context, containerSize: 1);
+            }
+        }
+
+        public static void Rev32_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSsse3)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+
+                Operand mask;
+
+                if (op.Size == 0)
+                {
+                    const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0;
+                    const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0;
+
+                    mask = X86GetScalar(context, maskE0);
+
+                    mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+                }
+                else /* if (op.Size == 1) */
+                {
+                    const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0;
+                    const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0;
+
+                    mask = X86GetScalar(context, maskE0);
+
+                    mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+                }
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitRev_V(context, containerSize: 2);
+            }
+        }
+
+        public static void Rev64_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSsse3)
+            {
+                OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+                Operand n = GetVec(op.Rn);
+
+                Operand mask;
+
+                if (op.Size == 0)
+                {
+                    const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0;
+                    const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0;
+
+                    mask = X86GetScalar(context, maskE0);
+
+                    mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+                }
+                else if (op.Size == 1)
+                {
+                    const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0;
+                    const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0;
+
+                    mask = X86GetScalar(context, maskE0);
+
+                    mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+                }
+                else /* if (op.Size == 2) */
+                {
+                    const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0;
+                    const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0;
+
+                    mask = X86GetScalar(context, maskE0);
+
+                    mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+                }
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitRev_V(context, containerSize: 3);
+            }
+        }
+
+        private static void EmitRev_V(ArmEmitterContext context, int containerSize)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            int containerMask = (1 << (containerSize - op.Size)) - 1;
+
+            for (int index = 0; index < elems; index++)
+            {
+                int revIndex = index ^ containerMask;
+
+                Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);
+
+                res = EmitVectorInsert(context, res, ne, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory.cs b/ARMeilleure/Instructions/InstEmitSimdMemory.cs
new file mode 100644
index 000000000..22e9ef7a8
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdMemory.cs
@@ -0,0 +1,160 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitMemoryHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        public static void Ld__Vms(ArmEmitterContext context)
+        {
+            EmitSimdMemMs(context, isLoad: true);
+        }
+
+        public static void Ld__Vss(ArmEmitterContext context)
+        {
+            EmitSimdMemSs(context, isLoad: true);
+        }
+
+        public static void St__Vms(ArmEmitterContext context)
+        {
+            EmitSimdMemMs(context, isLoad: false);
+        }
+
+        public static void St__Vss(ArmEmitterContext context)
+        {
+            EmitSimdMemSs(context, isLoad: false);
+        }
+
+        private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad)
+        {
+            OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp;
+
+            Operand n = GetIntOrSP(context, op.Rn);
+
+            long offset = 0;
+
+            for (int rep   = 0; rep   < op.Reps;   rep++)
+            for (int elem  = 0; elem  < op.Elems;  elem++)
+            for (int sElem = 0; sElem < op.SElems; sElem++)
+            {
+                int rtt = (op.Rt + rep + sElem) & 0x1f;
+
+                Operand tt = GetVec(rtt);
+
+                Operand address = context.Add(n, Const(offset));
+
+                if (isLoad)
+                {
+                    EmitLoadSimd(context, address, tt, rtt, elem, op.Size);
+
+                    if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1)
+                    {
+                        context.Copy(tt, context.VectorZeroUpper64(tt));
+                    }
+                }
+                else
+                {
+                    EmitStoreSimd(context, address, rtt, elem, op.Size);
+                }
+
+                offset += 1 << op.Size;
+            }
+
+            if (op.WBack)
+            {
+                EmitSimdMemWBack(context, offset);
+            }
+        }
+
+        private static void EmitSimdMemSs(ArmEmitterContext context, bool isLoad)
+        {
+            OpCodeSimdMemSs op = (OpCodeSimdMemSs)context.CurrOp;
+
+            Operand n = GetIntOrSP(context, op.Rn);
+
+            long offset = 0;
+
+            if (op.Replicate)
+            {
+                // Only loads uses the replicate mode.
+                Debug.Assert(isLoad, "Replicate mode is not valid for stores.");
+
+                int elems = op.GetBytesCount() >> op.Size;
+
+                for (int sElem = 0; sElem < op.SElems; sElem++)
+                {
+                    int rt = (op.Rt + sElem) & 0x1f;
+
+                    Operand t = GetVec(rt);
+
+                    Operand address = context.Add(n, Const(offset));
+
+                    for (int index = 0; index < elems; index++)
+                    {
+                        EmitLoadSimd(context, address, t, rt, index, op.Size);
+                    }
+
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        context.Copy(t, context.VectorZeroUpper64(t));
+                    }
+
+                    offset += 1 << op.Size;
+                }
+            }
+            else
+            {
+                for (int sElem = 0; sElem < op.SElems; sElem++)
+                {
+                    int rt = (op.Rt + sElem) & 0x1f;
+
+                    Operand t = GetVec(rt);
+
+                    Operand address = context.Add(n, Const(offset));
+
+                    if (isLoad)
+                    {
+                        EmitLoadSimd(context, address, t, rt, op.Index, op.Size);
+                    }
+                    else
+                    {
+                        EmitStoreSimd(context, address, rt, op.Index, op.Size);
+                    }
+
+                    offset += 1 << op.Size;
+                }
+            }
+
+            if (op.WBack)
+            {
+                EmitSimdMemWBack(context, offset);
+            }
+        }
+
+        private static void EmitSimdMemWBack(ArmEmitterContext context, long offset)
+        {
+            OpCodeMemReg op = (OpCodeMemReg)context.CurrOp;
+
+            Operand n = GetIntOrSP(context, op.Rn);
+            Operand m;
+
+            if (op.Rm != RegisterAlias.Zr)
+            {
+                m = GetIntOrZR(context, op.Rm);
+            }
+            else
+            {
+                m = Const(offset);
+            }
+
+            context.Copy(n, context.Add(n, m));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdMove.cs b/ARMeilleure/Instructions/InstEmitSimdMove.cs
new file mode 100644
index 000000000..47359161f
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdMove.cs
@@ -0,0 +1,794 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+#region "Masks"
+        private static readonly long[] _masksE0_TrnUzpXtn = new long[]
+        {
+            14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
+            13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
+            11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
+        };
+
+        private static readonly long[] _masksE1_TrnUzp = new long[]
+        {
+            15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0,
+            15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0,
+            15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0
+        };
+
+        private static readonly long[] _masksE0_Uzp = new long[]
+        {
+            13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
+            11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0
+        };
+
+        private static readonly long[] _masksE1_Uzp = new long[]
+        {
+            15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
+            15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0
+        };
+#endregion
+
+        public static void Dup_Gp(ArmEmitterContext context)
+        {
+            OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            if (Optimizations.UseSse2)
+            {
+                switch (op.Size)
+                {
+                    case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break;
+                    case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break;
+                    case 2: n = context.ZeroExtend32(n.Type, n); break;
+                }
+
+                Operand res = context.VectorInsert(context.VectorZero(), n, 0);
+
+                if (op.Size < 3)
+                {
+                    if (op.RegisterSize == RegisterSize.Simd64)
+                    {
+                        res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0));
+                    }
+                    else
+                    {
+                        res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+                    }
+                }
+                else
+                {
+                    res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Operand res = context.VectorZero();
+
+                int elems = op.GetBytesCount() >> op.Size;
+
+                for (int index = 0; index < elems; index++)
+                {
+                    res = EmitVectorInsert(context, res, n, index, op.Size);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        public static void Dup_S(ArmEmitterContext context)
+        {
+            OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+            Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+            context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size));
+        }
+
+        public static void Dup_V(ArmEmitterContext context)
+        {
+            OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+            if (Optimizations.UseSse2)
+            {
+                Operand res = GetVec(op.Rn);
+
+                if (op.Size == 0)
+                {
+                    if (op.DstIndex != 0)
+                    {
+                        res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex));
+                    }
+
+                    res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+                }
+                else if (op.Size == 1)
+                {
+                    if (op.DstIndex != 0)
+                    {
+                        res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2));
+                    }
+
+                    res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
+                    res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
+                }
+                else if (op.Size == 2)
+                {
+                    int mask = op.DstIndex * 0b01010101;
+
+                    res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask));
+                }
+                else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64)
+                {
+                    res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
+                }
+                else if (op.DstIndex == 1)
+                {
+                    res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res);
+                }
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+                Operand res = context.VectorZero();
+
+                int elems = op.GetBytesCount() >> op.Size;
+
+                for (int index = 0; index < elems; index++)
+                {
+                    res = EmitVectorInsert(context, res, ne, index, op.Size);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        public static void Ext_V(ArmEmitterContext context)
+        {
+            OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;
+
+            if (Optimizations.UseSse2)
+            {
+                Operand nShifted = GetVec(op.Rn);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    nShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, nShifted, context.VectorZero());
+                }
+
+                nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4));
+
+                Operand mShifted = GetVec(op.Rm);
+
+                mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    mShifted = context.AddIntrinsic(Intrinsic.X86Movlhps, mShifted, context.VectorZero());
+                }
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted);
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Operand res = context.VectorZero();
+
+                int bytes = op.GetBytesCount();
+
+                int position = op.Imm4 & (bytes - 1);
+
+                for (int index = 0; index < bytes; index++)
+                {
+                    int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
+
+                    Operand e = EmitVectorExtractZx(context, reg, position, 0);
+
+                    position = (position + 1) & (bytes - 1);
+
+                    res = EmitVectorInsert(context, res, e, index, 0);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        public static void Fcsel_S(ArmEmitterContext context)
+        {
+            OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
+
+            Operand lblTrue = Label();
+            Operand lblEnd  = Label();
+
+            Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond);
+
+            context.BranchIfTrue(lblTrue, isTrue);
+
+            OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+            Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0));
+
+            context.Branch(lblEnd);
+
+            context.MarkLabel(lblTrue);
+
+            Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+
+            context.MarkLabel(lblEnd);
+        }
+
+        public static void Fmov_Ftoi(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
+
+            SetIntOrZR(context, op.Rd, ne);
+        }
+
+        public static void Fmov_Ftoi1(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3);
+
+            SetIntOrZR(context, op.Rd, ne);
+        }
+
+        public static void Fmov_Itof(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2));
+        }
+
+        public static void Fmov_Itof1(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            context.Copy(GetVec(op.Rd), EmitVectorInsert(context, GetVec(op.Rd), n, 1, 3));
+        }
+
+        public static void Fmov_S(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
+
+            Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
+
+            context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
+        }
+
+        public static void Fmov_Si(ArmEmitterContext context)
+        {
+            OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp;
+
+            if (op.Size == 0)
+            {
+                context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate));
+            }
+            else
+            {
+                context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
+            }
+        }
+
+        public static void Fmov_Vi(ArmEmitterContext context)
+        {
+            OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+            Operand e = Const(op.Immediate);
+
+            Operand res = context.VectorZero();
+
+            int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2;
+
+            for (int index = 0; index < (elems >> op.Size); index++)
+            {
+                res = EmitVectorInsert(context, res, e, index, op.Size + 2);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Ins_Gp(ArmEmitterContext context)
+        {
+            OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+            Operand d = GetVec(op.Rd);
+            Operand n = GetIntOrZR(context, op.Rn);
+
+            context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size));
+        }
+
+        public static void Ins_V(ArmEmitterContext context)
+        {
+            OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+            Operand d  = GetVec(op.Rd);
+            Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
+
+            context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size));
+        }
+
+        public static void Movi_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                EmitMoviMvni(context, not: false);
+            }
+            else
+            {
+                EmitVectorImmUnaryOp(context, (op1) => op1);
+            }
+        }
+
+        public static void Mvni_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                EmitMoviMvni(context, not: true);
+            }
+            else
+            {
+                EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1));
+            }
+        }
+
+        public static void Smov_S(ArmEmitterContext context)
+        {
+            OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+            Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
+
+            if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                ne = context.ZeroExtend32(OperandType.I64, ne);
+            }
+
+            SetIntOrZR(context, op.Rd, ne);
+        }
+
+        public static void Tbl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
+
+            if (Optimizations.UseSsse3)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL);
+
+                Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
+
+                mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
+
+                for (int index = 1; index < op.Size; index++)
+                {
+                    Operand ni = GetVec((op.Rn + index) & 0x1f);
+
+                    Operand indexMask = X86GetAllElements(context, 0x1010101010101010L * index);
+
+                    Operand mMinusMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, indexMask);
+
+                    Operand mMask2 = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mMinusMask, mask);
+
+                    mMask2 = context.AddIntrinsic(Intrinsic.X86Por, mMask2, mMinusMask);
+
+                    Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask2);
+
+                    res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
+                }
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Operand[] args = new Operand[1 + op.Size];
+
+                args[0] = GetVec(op.Rm);
+
+                for (int index = 0; index < op.Size; index++)
+                {
+                    args[1 + index] = GetVec((op.Rn + index) & 0x1f);
+                }
+
+                Delegate dlg = null;
+
+                switch (op.Size)
+                {
+                    case 1: dlg = op.RegisterSize == RegisterSize.Simd64
+                        ? (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V64)
+                        : (Delegate)new _V128_V128_V128(SoftFallback.Tbl1_V128); break;
+
+                    case 2: dlg = op.RegisterSize == RegisterSize.Simd64
+                        ? (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V64)
+                        : (Delegate)new _V128_V128_V128_V128(SoftFallback.Tbl2_V128); break;
+
+                    case 3: dlg = op.RegisterSize == RegisterSize.Simd64
+                        ? (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V64)
+                        : (Delegate)new _V128_V128_V128_V128_V128(SoftFallback.Tbl3_V128); break;
+
+                    case 4: dlg = op.RegisterSize == RegisterSize.Simd64
+                        ? (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V64)
+                        : (Delegate)new _V128_V128_V128_V128_V128_V128(SoftFallback.Tbl4_V128); break;
+                }
+
+                context.Copy(GetVec(op.Rd), context.Call(dlg, args));
+            }
+        }
+
+        public static void Trn1_V(ArmEmitterContext context)
+        {
+            EmitVectorTranspose(context, part: 0);
+        }
+
+        public static void Trn2_V(ArmEmitterContext context)
+        {
+            EmitVectorTranspose(context, part: 1);
+        }
+
+        public static void Umov_S(ArmEmitterContext context)
+        {
+            OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
+
+            Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
+
+            SetIntOrZR(context, op.Rd, ne);
+        }
+
+        public static void Uzp1_V(ArmEmitterContext context)
+        {
+            EmitVectorUnzip(context, part: 0);
+        }
+
+        public static void Uzp2_V(ArmEmitterContext context)
+        {
+            EmitVectorUnzip(context, part: 1);
+        }
+
+        public static void Xtn_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            if (Optimizations.UseSsse3)
+            {
+                Operand d = GetVec(op.Rd);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+                Operand n = GetVec(op.Rn);
+
+                Operand mask = X86GetAllElements(context, _masksE0_TrnUzpXtn[op.Size]);
+
+                Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+
+                Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+                    ? Intrinsic.X86Movlhps
+                    : Intrinsic.X86Movhlps;
+
+                res = context.AddIntrinsic(movInst, res, res2);
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                int elems = 8 >> op.Size;
+
+                int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+                Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+                for (int index = 0; index < elems; index++)
+                {
+                    Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+                    res = EmitVectorInsert(context, res, ne, part + index, op.Size);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        public static void Zip1_V(ArmEmitterContext context)
+        {
+            EmitVectorZip(context, part: 0);
+        }
+
+        public static void Zip2_V(ArmEmitterContext context)
+        {
+            EmitVectorZip(context, part: 1);
+        }
+
+        private static void EmitMoviMvni(ArmEmitterContext context, bool not)
+        {
+            OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
+
+            long imm = op.Immediate;
+
+            switch (op.Size)
+            {
+                case 0: imm *= 0x01010101; break;
+                case 1: imm *= 0x00010001; break;
+            }
+
+            if (not)
+            {
+                imm = ~imm;
+            }
+
+            Operand mask;
+
+            if (op.Size < 3)
+            {
+                mask = X86GetAllElements(context, (int)imm);
+            }
+            else
+            {
+                mask = X86GetAllElements(context, imm);
+            }
+
+            if (op.RegisterSize == RegisterSize.Simd64)
+            {
+                mask = context.VectorZeroUpper64(mask);
+            }
+
+            context.Copy(GetVec(op.Rd), mask);
+        }
+
+        private static void EmitVectorTranspose(ArmEmitterContext context, int part)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSsse3)
+            {
+                Operand mask = null;
+
+                if (op.Size < 3)
+                {
+                    long maskE0 = _masksE0_TrnUzpXtn[op.Size];
+                    long maskE1 = _masksE1_TrnUzp   [op.Size];
+
+                    mask = X86GetScalar(context, maskE0);
+
+                    mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+                }
+
+                Operand n = GetVec(op.Rn);
+
+                if (op.Size < 3)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+                }
+
+                Operand m = GetVec(op.Rm);
+
+                if (op.Size < 3)
+                {
+                    m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+                }
+
+                Intrinsic punpckInst = part == 0
+                    ? X86PunpcklInstruction[op.Size]
+                    : X86PunpckhInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                Operand res = context.VectorZero();
+
+                int pairs = op.GetPairsCount() >> op.Size;
+
+                for (int index = 0; index < pairs; index++)
+                {
+                    int pairIndex = index << 1;
+
+                    Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size);
+                    Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size);
+
+                    res = EmitVectorInsert(context, res, ne, pairIndex,     op.Size);
+                    res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        private static void EmitVectorUnzip(ArmEmitterContext context, int part)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSsse3)
+            {
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    Operand mask = null;
+
+                    if (op.Size < 3)
+                    {
+                        long maskE0 = _masksE0_TrnUzpXtn[op.Size];
+                        long maskE1 = _masksE1_TrnUzp   [op.Size];
+
+                        mask = X86GetScalar(context, maskE0);
+
+                        mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+                    }
+
+                    Operand n = GetVec(op.Rn);
+
+                    if (op.Size < 3)
+                    {
+                        n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
+                    }
+
+                    Operand m = GetVec(op.Rm);
+
+                    if (op.Size < 3)
+                    {
+                        m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
+                    }
+
+                    Intrinsic punpckInst = part == 0
+                        ? Intrinsic.X86Punpcklqdq
+                        : Intrinsic.X86Punpckhqdq;
+
+                    Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else
+                {
+                    Operand n = GetVec(op.Rn);
+                    Operand m = GetVec(op.Rm);
+
+                    Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
+
+                    Operand res = context.AddIntrinsic(punpcklInst, n, m);
+
+                    if (op.Size < 2)
+                    {
+                        long maskE0 = _masksE0_Uzp[op.Size];
+                        long maskE1 = _masksE1_Uzp[op.Size];
+
+                        Operand mask = X86GetScalar(context, maskE0);
+
+                        mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
+
+                        res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
+                    }
+
+                    Intrinsic punpckInst = part == 0
+                        ? Intrinsic.X86Punpcklqdq
+                        : Intrinsic.X86Punpckhqdq;
+
+                    res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                Operand res = context.VectorZero();
+
+                int pairs = op.GetPairsCount() >> op.Size;
+
+                for (int index = 0; index < pairs; index++)
+                {
+                    int idx = index << 1;
+
+                    Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
+                    Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
+
+                    res = EmitVectorInsert(context, res, ne,         index, op.Size);
+                    res = EmitVectorInsert(context, res, me, pairs + index, op.Size);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+
+        private static void EmitVectorZip(ArmEmitterContext context, int part)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            if (Optimizations.UseSse2)
+            {
+                Operand n = GetVec(op.Rn);
+                Operand m = GetVec(op.Rm);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    Intrinsic punpckInst = part == 0
+                        ? X86PunpcklInstruction[op.Size]
+                        : X86PunpckhInstruction[op.Size];
+
+                    Operand res = context.AddIntrinsic(punpckInst, n, m);
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+                else
+                {
+                    Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
+
+                    Intrinsic punpckInst = part == 0
+                        ? Intrinsic.X86Punpcklqdq
+                        : Intrinsic.X86Punpckhqdq;
+
+                    res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
+
+                    context.Copy(GetVec(op.Rd), res);
+                }
+            }
+            else
+            {
+                Operand res = context.VectorZero();
+
+                int pairs = op.GetPairsCount() >> op.Size;
+
+                int baseIndex = part != 0 ? pairs : 0;
+
+                for (int index = 0; index < pairs; index++)
+                {
+                    int pairIndex = index << 1;
+
+                    Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
+                    Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);
+
+                    res = EmitVectorInsert(context, res, ne, pairIndex,     op.Size);
+                    res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs
new file mode 100644
index 000000000..1aae491df
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -0,0 +1,1057 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    using Func2I = Func<Operand, Operand, Operand>;
+
+    static partial class InstEmit
+    {
+#region "Masks"
+        private static readonly long[] _masks_RshrnShrn = new long[]
+        {
+            14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
+            13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
+            11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
+        };
+#endregion
+
+        public static void Rshrn_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSsse3)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                long roundConst = 1L << (shift - 1);
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+
+                Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+                Operand mask = null;
+
+                switch (op.Size + 1)
+                {
+                    case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break;
+                    case 2: mask = X86GetAllElements(context, (int)roundConst); break;
+                    case 3: mask = X86GetAllElements(context,      roundConst); break;
+                }
+
+                Intrinsic addInst = X86PaddInstruction[op.Size + 1];
+
+                Operand res = context.AddIntrinsic(addInst, n, mask);
+
+                Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+                res = context.AddIntrinsic(srlInst, res, Const(shift));
+
+                Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);
+
+                res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);
+
+                Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+                    ? Intrinsic.X86Movlhps
+                    : Intrinsic.X86Movhlps;
+
+                res = context.AddIntrinsic(movInst, dLow, res);
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorShrImmNarrowOpZx(context, round: true);
+            }
+        }
+
+        public static void Shl_S(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            int shift = GetImmShl(op);
+
+            EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
+        }
+
+        public static void Shl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            int shift = GetImmShl(op);
+
+            if (Optimizations.UseSse2 && op.Size > 0)
+            {
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(sllInst, n, Const(shift));
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorUnaryOpZx(context, (op1) =>  context.ShiftLeft(op1, Const(shift)));
+            }
+        }
+
+        public static void Shll_V(ArmEmitterContext context)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            int shift = 8 << op.Size;
+
+            if (Optimizations.UseSse41)
+            {
+                Operand n = GetVec(op.Rn);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                }
+
+                Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(movsxInst, n);
+
+                Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+                res = context.AddIntrinsic(sllInst, res, Const(shift));
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+            }
+        }
+
+        public static void Shrn_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSsse3)
+            {
+                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+                int shift = GetImmShr(op);
+
+                long roundConst = 1L << (shift - 1);
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+
+                Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());
+
+                Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];
+
+                Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));
+
+                Operand mask = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);
+
+                Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);
+
+                Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
+                    ? Intrinsic.X86Movlhps
+                    : Intrinsic.X86Movhlps;
+
+                res = context.AddIntrinsic(movInst, dLow, res);
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorShrImmNarrowOpZx(context, round: false);
+            }
+        }
+
+        public static void Sli_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            int shift = GetImmShl(op);
+
+            ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+
+                Operand neShifted = context.ShiftLeft(ne, Const(shift));
+
+                Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
+
+                Operand deMasked = context.BitwiseAnd(de, Const(mask));
+
+                Operand e = context.BitwiseOr(neShifted, deMasked);
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sqrshl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+                Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(1), Const(op.Size));
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sqrshrn_S(ArmEmitterContext context)
+        {
+            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+        }
+
+        public static void Sqrshrn_V(ArmEmitterContext context)
+        {
+            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+        }
+
+        public static void Sqrshrun_S(ArmEmitterContext context)
+        {
+            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+        }
+
+        public static void Sqrshrun_V(ArmEmitterContext context)
+        {
+            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+        }
+
+        public static void Sqshl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+                Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlRegSatQ), ne, me, Const(0), Const(op.Size));
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sqshrn_S(ArmEmitterContext context)
+        {
+            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
+        }
+
+        public static void Sqshrn_V(ArmEmitterContext context)
+        {
+            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
+        }
+
+        public static void Sqshrun_S(ArmEmitterContext context)
+        {
+            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
+        }
+
+        public static void Sqshrun_V(ArmEmitterContext context)
+        {
+            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+        }
+
+        public static void Srshl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+                Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(1), Const(op.Size));
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Srshr_S(ArmEmitterContext context)
+        {
+            EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
+        }
+
+        public static void Srshr_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            {
+                int shift = GetImmShr(op);
+                int eSize = 8 << op.Size;
+
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+                Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+                res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+                Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+                Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                res = context.AddIntrinsic(addInst, res, nSra);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
+            }
+        }
+
+        public static void Srsra_S(ArmEmitterContext context)
+        {
+            EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+        }
+
+        public static void Srsra_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            {
+                int shift = GetImmShr(op);
+                int eSize = 8 << op.Size;
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+                Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+                res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+                Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+                Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift));
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                res = context.AddIntrinsic(addInst, res, nSra);
+                res = context.AddIntrinsic(addInst, res, d);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            }
+        }
+
+        public static void Sshl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
+
+                Operand e = context.Call(new _S64_S64_S64_Bool_S32(SoftFallback.SignedShlReg), ne, me, Const(0), Const(op.Size));
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Sshll_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            int shift = GetImmShl(op);
+
+            if (Optimizations.UseSse41)
+            {
+                Operand n = GetVec(op.Rn);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                }
+
+                Intrinsic movsxInst = X86PmovsxInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(movsxInst, n);
+
+                if (shift != 0)
+                {
+                    Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+                    res = context.AddIntrinsic(sllInst, res, Const(shift));
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+            }
+        }
+
+        public static void Sshr_S(ArmEmitterContext context)
+        {
+            EmitShrImmOp(context, ShrImmFlags.ScalarSx);
+        }
+
+        public static void Sshr_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            {
+                int shift = GetImmShr(op);
+
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitShrImmOp(context, ShrImmFlags.VectorSx);
+            }
+        }
+
+        public static void Ssra_S(ArmEmitterContext context)
+        {
+            EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
+        }
+
+        public static void Ssra_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
+            {
+                int shift = GetImmShr(op);
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic sraInst = X86PsraInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(sraInst, n, Const(shift));
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                res = context.AddIntrinsic(addInst, res, d);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(d, res);
+            }
+            else
+            {
+                EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
+            }
+        }
+
+        public static void Uqrshl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+                Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(1), Const(op.Size));
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Uqrshrn_S(ArmEmitterContext context)
+        {
+            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+        }
+
+        public static void Uqrshrn_V(ArmEmitterContext context)
+        {
+            EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+        }
+
+        public static void Uqshl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+                Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlRegSatQ), ne, me, Const(0), Const(op.Size));
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Uqshrn_S(ArmEmitterContext context)
+        {
+            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
+        }
+
+        public static void Uqshrn_V(ArmEmitterContext context)
+        {
+            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
+        }
+
+        public static void Urshl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+                Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(1), Const(op.Size));
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Urshr_S(ArmEmitterContext context)
+        {
+            EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
+        }
+
+        public static void Urshr_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0)
+            {
+                int shift = GetImmShr(op);
+                int eSize = 8 << op.Size;
+
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+                Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+                res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+                Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                res = context.AddIntrinsic(addInst, res, nSrl);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
+            }
+        }
+
+        public static void Ursra_S(ArmEmitterContext context)
+        {
+            EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+        }
+
+        public static void Ursra_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0)
+            {
+                int shift = GetImmShr(op);
+                int eSize = 8 << op.Size;
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic sllInst = X86PsllInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift));
+
+                Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+                res = context.AddIntrinsic(srlInst, res, Const(eSize - 1));
+
+                Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift));
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                res = context.AddIntrinsic(addInst, res, nSrl);
+                res = context.AddIntrinsic(addInst, res, d);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+            }
+        }
+
+        public static void Ushl_V(ArmEmitterContext context)
+        {
+            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
+                Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
+
+                Operand e = context.Call(new _U64_U64_U64_Bool_S32(SoftFallback.UnsignedShlReg), ne, me, Const(0), Const(op.Size));
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        public static void Ushll_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            int shift = GetImmShl(op);
+
+            if (Optimizations.UseSse41)
+            {
+                Operand n = GetVec(op.Rn);
+
+                if (op.RegisterSize == RegisterSize.Simd128)
+                {
+                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
+                }
+
+                Intrinsic movzxInst = X86PmovzxInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(movzxInst, n);
+
+                if (shift != 0)
+                {
+                    Intrinsic sllInst = X86PsllInstruction[op.Size + 1];
+
+                    res = context.AddIntrinsic(sllInst, res, Const(shift));
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
+            }
+        }
+
+        public static void Ushr_S(ArmEmitterContext context)
+        {
+            EmitShrImmOp(context, ShrImmFlags.ScalarZx);
+        }
+
+        public static void Ushr_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0)
+            {
+                int shift = GetImmShr(op);
+
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(GetVec(op.Rd), res);
+            }
+            else
+            {
+                EmitShrImmOp(context, ShrImmFlags.VectorZx);
+            }
+        }
+
+        public static void Usra_S(ArmEmitterContext context)
+        {
+            EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
+        }
+
+        public static void Usra_V(ArmEmitterContext context)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            if (Optimizations.UseSse2 && op.Size > 0)
+            {
+                int shift = GetImmShr(op);
+
+                Operand d = GetVec(op.Rd);
+                Operand n = GetVec(op.Rn);
+
+                Intrinsic srlInst = X86PsrlInstruction[op.Size];
+
+                Operand res = context.AddIntrinsic(srlInst, n, Const(shift));
+
+                Intrinsic addInst = X86PaddInstruction[op.Size];
+
+                res = context.AddIntrinsic(addInst, res, d);
+
+                if (op.RegisterSize == RegisterSize.Simd64)
+                {
+                    res = context.VectorZeroUpper64(res);
+                }
+
+                context.Copy(d, res);
+            }
+            else
+            {
+                EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
+            }
+        }
+
+        [Flags]
+        private enum ShrImmFlags
+        {
+            Scalar = 1 << 0,
+            Signed = 1 << 1,
+
+            Round      = 1 << 2,
+            Accumulate = 1 << 3,
+
+            ScalarSx = Scalar | Signed,
+            ScalarZx = Scalar,
+
+            VectorSx = Signed,
+            VectorZx = 0
+        }
+
+        private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+        {
+            EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
+        }
+
+        private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+        {
+            EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
+        }
+
+        private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags)
+        {
+            EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
+        }
+
+        private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags)
+        {
+            EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
+        }
+
+        private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            bool scalar     = (flags & ShrImmFlags.Scalar)     != 0;
+            bool signed     = (flags & ShrImmFlags.Signed)     != 0;
+            bool round      = (flags & ShrImmFlags.Round)      != 0;
+            bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
+
+            int shift = GetImmShr(op);
+
+            long roundConst = 1L << (shift - 1);
+
+            int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
+
+                if (op.Size <= 2)
+                {
+                    if (round)
+                    {
+                        e = context.Add(e, Const(roundConst));
+                    }
+
+                    e = signed
+                        ? context.ShiftRightSI(e, Const(shift))
+                        : context.ShiftRightUI(e, Const(shift));
+                }
+                else /* if (op.Size == 3) */
+                {
+                    e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift);
+                }
+
+                if (accumulate)
+                {
+                    Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
+
+                    e = context.Add(e, de);
+                }
+
+                res = EmitVectorInsert(context, res, e, index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            int shift = GetImmShr(op);
+
+            long roundConst = 1L << (shift - 1);
+
+            int elems = 8 >> op.Size;
+
+            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+            Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
+
+                if (round)
+                {
+                    e = context.Add(e, Const(roundConst));
+                }
+
+                e = context.ShiftRightUI(e, Const(shift));
+
+                res = EmitVectorInsert(context, res, e, part + index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        [Flags]
+        private enum ShrImmSaturatingNarrowFlags
+        {
+            Scalar    = 1 << 0,
+            SignedSrc = 1 << 1,
+            SignedDst = 1 << 2,
+
+            Round = 1 << 3,
+
+            ScalarSxSx = Scalar | SignedSrc | SignedDst,
+            ScalarSxZx = Scalar | SignedSrc,
+            ScalarZxZx = Scalar,
+
+            VectorSxSx = SignedSrc | SignedDst,
+            VectorSxZx = SignedSrc,
+            VectorZxZx = 0
+        }
+
+        private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+        {
+            EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+        }
+
+        private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+        {
+            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
+
+            bool scalar    = (flags & ShrImmSaturatingNarrowFlags.Scalar)    != 0;
+            bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+            bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+            bool round     = (flags & ShrImmSaturatingNarrowFlags.Round)     != 0;
+
+            int shift = GetImmShr(op);
+
+            long roundConst = 1L << (shift - 1);
+
+            int elems = !scalar ? 8 >> op.Size : 1;
+
+            int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
+
+            Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
+
+                if (op.Size <= 1 || !round)
+                {
+                    if (round)
+                    {
+                        e = context.Add(e, Const(roundConst));
+                    }
+
+                    e = signedSrc
+                        ? context.ShiftRightSI(e, Const(shift))
+                        : context.ShiftRightUI(e, Const(shift));
+                }
+                else /* if (op.Size == 2 && round) */
+                {
+                    e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32
+                }
+
+                e = EmitSatQ(context, e, op.Size, signedSrc, signedDst);
+
+                res = EmitVectorInsert(context, res, e, part + index, op.Size);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+
+        // dst64 = (Int(src64, signed) + roundConst) >> shift;
+        private static Operand EmitShrImm64(
+            ArmEmitterContext context,
+            Operand value,
+            bool signed,
+            long roundConst,
+            int shift)
+        {
+            Delegate dlg = signed
+                ? (Delegate)new _S64_S64_S64_S32(SoftFallback.SignedShrImm64)
+                : (Delegate)new _U64_U64_S64_S32(SoftFallback.UnsignedShrImm64);
+
+            return context.Call(dlg, value, Const(roundConst), Const(shift));
+        }
+
+        private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm)
+        {
+            EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true);
+        }
+
+        private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm)
+        {
+            EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false);
+        }
+
+        private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed)
+        {
+            OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = 8 >> op.Size;
+
+            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
+
+                res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1);
+            }
+
+            context.Copy(GetVec(op.Rd), res);
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstEmitSystem.cs b/ARMeilleure/Instructions/InstEmitSystem.cs
new file mode 100644
index 000000000..eeb53c1fe
--- /dev/null
+++ b/ARMeilleure/Instructions/InstEmitSystem.cs
@@ -0,0 +1,114 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Instructions
+{
+    static partial class InstEmit
+    {
+        private const int DczSizeLog2 = 4;
+
+        public static void Hint(ArmEmitterContext context)
+        {
+            // Execute as no-op.
+        }
+
+        public static void Isb(ArmEmitterContext context)
+        {
+            // Execute as no-op.
+        }
+
+        public static void Mrs(ArmEmitterContext context)
+        {
+            OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+            Delegate dlg;
+
+            switch (GetPackedId(op))
+            {
+                case 0b11_011_0000_0000_001: dlg = new _U64(NativeInterface.GetCtrEl0);    break;
+                case 0b11_011_0000_0000_111: dlg = new _U64(NativeInterface.GetDczidEl0);  break;
+                case 0b11_011_0100_0100_000: dlg = new _U64(NativeInterface.GetFpcr);      break;
+                case 0b11_011_0100_0100_001: dlg = new _U64(NativeInterface.GetFpsr);      break;
+                case 0b11_011_1101_0000_010: dlg = new _U64(NativeInterface.GetTpidrEl0);  break;
+                case 0b11_011_1101_0000_011: dlg = new _U64(NativeInterface.GetTpidr);     break;
+                case 0b11_011_1110_0000_000: dlg = new _U64(NativeInterface.GetCntfrqEl0); break;
+                case 0b11_011_1110_0000_001: dlg = new _U64(NativeInterface.GetCntpctEl0); break;
+
+                default: throw new NotImplementedException($"Unknown MRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+            }
+
+            SetIntOrZR(context, op.Rt, context.Call(dlg));
+        }
+
+        public static void Msr(ArmEmitterContext context)
+        {
+            OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+            Delegate dlg;
+
+            switch (GetPackedId(op))
+            {
+                case 0b11_011_0100_0100_000: dlg = new _Void_U64(NativeInterface.SetFpcr);     break;
+                case 0b11_011_0100_0100_001: dlg = new _Void_U64(NativeInterface.SetFpsr);     break;
+                case 0b11_011_1101_0000_010: dlg = new _Void_U64(NativeInterface.SetTpidrEl0); break;
+
+                default: throw new NotImplementedException($"Unknown MSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}.");
+            }
+
+            context.Call(dlg, GetIntOrZR(context, op.Rt));
+        }
+
+        public static void Nop(ArmEmitterContext context)
+        {
+            // Do nothing.
+        }
+
+        public static void Sys(ArmEmitterContext context)
+        {
+            // This instruction is used to do some operations on the CPU like cache invalidation,
+            // address translation and the like.
+            // We treat it as no-op here since we don't have any cache being emulated anyway.
+            OpCodeSystem op = (OpCodeSystem)context.CurrOp;
+
+            switch (GetPackedId(op))
+            {
+                case 0b11_011_0111_0100_001:
+                {
+                    // DC ZVA
+                    Operand t = GetIntOrZR(context, op.Rt);
+
+                    for (long offset = 0; offset < (4 << DczSizeLog2); offset += 8)
+                    {
+                        Operand address = context.Add(t, Const(offset));
+
+                        context.Call(new _Void_U64_U64(NativeInterface.WriteUInt64), address, Const(0L));
+                    }
+
+                    break;
+                }
+
+                // No-op
+                case 0b11_011_0111_1110_001: //DC CIVAC
+                    break;
+            }
+        }
+
+        private static int GetPackedId(OpCodeSystem op)
+        {
+            int id;
+
+            id  = op.Op2 << 0;
+            id |= op.CRm << 3;
+            id |= op.CRn << 7;
+            id |= op.Op1 << 11;
+            id |= op.Op0 << 14;
+
+            return id;
+        }
+    }
+}
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
new file mode 100644
index 000000000..e70ca34bc
--- /dev/null
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -0,0 +1,459 @@
+namespace ARMeilleure.Instructions
+{
+    enum InstName
+    {
+        // Base (AArch64)
+        Adc,
+        Adcs,
+        Add,
+        Adds,
+        Adr,
+        Adrp,
+        And,
+        Ands,
+        Asrv,
+        B,
+        B_Cond,
+        Bfm,
+        Bic,
+        Bics,
+        Bl,
+        Blr,
+        Br,
+        Brk,
+        Cbnz,
+        Cbz,
+        Ccmn,
+        Ccmp,
+        Clrex,
+        Cls,
+        Clz,
+        Crc32b,
+        Crc32h,
+        Crc32w,
+        Crc32x,
+        Crc32cb,
+        Crc32ch,
+        Crc32cw,
+        Crc32cx,
+        Csel,
+        Csinc,
+        Csinv,
+        Csneg,
+        Dmb,
+        Dsb,
+        Eon,
+        Eor,
+        Extr,
+        Hint,
+        Isb,
+        Ldar,
+        Ldaxp,
+        Ldaxr,
+        Ldp,
+        Ldr,
+        Ldr_Literal,
+        Ldrs,
+        Ldxr,
+        Ldxp,
+        Lslv,
+        Lsrv,
+        Madd,
+        Movk,
+        Movn,
+        Movz,
+        Mrs,
+        Msr,
+        Msub,
+        Nop,
+        Orn,
+        Orr,
+        Pfrm,
+        Rbit,
+        Ret,
+        Rev16,
+        Rev32,
+        Rev64,
+        Rorv,
+        Sbc,
+        Sbcs,
+        Sbfm,
+        Sdiv,
+        Smaddl,
+        Smsubl,
+        Smulh,
+        Stlr,
+        Stlxp,
+        Stlxr,
+        Stp,
+        Str,
+        Stxp,
+        Stxr,
+        Sub,
+        Subs,
+        Svc,
+        Sys,
+        Tbnz,
+        Tbz,
+        Ubfm,
+        Udiv,
+        Umaddl,
+        Umsubl,
+        Umulh,
+        Und,
+
+        // FP & SIMD (AArch64)
+        Abs_S,
+        Abs_V,
+        Add_S,
+        Add_V,
+        Addhn_V,
+        Addp_S,
+        Addp_V,
+        Addv_V,
+        Aesd_V,
+        Aese_V,
+        Aesimc_V,
+        Aesmc_V,
+        And_V,
+        Bic_V,
+        Bic_Vi,
+        Bif_V,
+        Bit_V,
+        Bsl_V,
+        Cls_V,
+        Clz_V,
+        Cmeq_S,
+        Cmeq_V,
+        Cmge_S,
+        Cmge_V,
+        Cmgt_S,
+        Cmgt_V,
+        Cmhi_S,
+        Cmhi_V,
+        Cmhs_S,
+        Cmhs_V,
+        Cmle_S,
+        Cmle_V,
+        Cmlt_S,
+        Cmlt_V,
+        Cmtst_S,
+        Cmtst_V,
+        Cnt_V,
+        Dup_Gp,
+        Dup_S,
+        Dup_V,
+        Eor_V,
+        Ext_V,
+        Fabd_S,
+        Fabd_V,
+        Fabs_S,
+        Fabs_V,
+        Fadd_S,
+        Fadd_V,
+        Faddp_S,
+        Faddp_V,
+        Fccmp_S,
+        Fccmpe_S,
+        Fcmeq_S,
+        Fcmeq_V,
+        Fcmge_S,
+        Fcmge_V,
+        Fcmgt_S,
+        Fcmgt_V,
+        Fcmle_S,
+        Fcmle_V,
+        Fcmlt_S,
+        Fcmlt_V,
+        Fcmp_S,
+        Fcmpe_S,
+        Fcsel_S,
+        Fcvt_S,
+        Fcvtas_Gp,
+        Fcvtau_Gp,
+        Fcvtl_V,
+        Fcvtms_Gp,
+        Fcvtmu_Gp,
+        Fcvtn_V,
+        Fcvtns_S,
+        Fcvtns_V,
+        Fcvtnu_S,
+        Fcvtnu_V,
+        Fcvtps_Gp,
+        Fcvtpu_Gp,
+        Fcvtzs_Gp,
+        Fcvtzs_Gp_Fixed,
+        Fcvtzs_S,
+        Fcvtzs_V,
+        Fcvtzs_V_Fixed,
+        Fcvtzu_Gp,
+        Fcvtzu_Gp_Fixed,
+        Fcvtzu_S,
+        Fcvtzu_V,
+        Fcvtzu_V_Fixed,
+        Fdiv_S,
+        Fdiv_V,
+        Fmadd_S,
+        Fmax_S,
+        Fmax_V,
+        Fmaxnm_S,
+        Fmaxnm_V,
+        Fmaxp_V,
+        Fmin_S,
+        Fmin_V,
+        Fminnm_S,
+        Fminnm_V,
+        Fminp_V,
+        Fmla_Se,
+        Fmla_V,
+        Fmla_Ve,
+        Fmls_Se,
+        Fmls_V,
+        Fmls_Ve,
+        Fmov_S,
+        Fmov_Si,
+        Fmov_Vi,
+        Fmov_Ftoi,
+        Fmov_Itof,
+        Fmov_Ftoi1,
+        Fmov_Itof1,
+        Fmsub_S,
+        Fmul_S,
+        Fmul_Se,
+        Fmul_V,
+        Fmul_Ve,
+        Fmulx_S,
+        Fmulx_Se,
+        Fmulx_V,
+        Fmulx_Ve,
+        Fneg_S,
+        Fneg_V,
+        Fnmadd_S,
+        Fnmsub_S,
+        Fnmul_S,
+        Frecpe_S,
+        Frecpe_V,
+        Frecps_S,
+        Frecps_V,
+        Frecpx_S,
+        Frinta_S,
+        Frinta_V,
+        Frinti_S,
+        Frinti_V,
+        Frintm_S,
+        Frintm_V,
+        Frintn_S,
+        Frintn_V,
+        Frintp_S,
+        Frintp_V,
+        Frintx_S,
+        Frintx_V,
+        Frintz_S,
+        Frintz_V,
+        Frsqrte_S,
+        Frsqrte_V,
+        Frsqrts_S,
+        Frsqrts_V,
+        Fsqrt_S,
+        Fsqrt_V,
+        Fsub_S,
+        Fsub_V,
+        Ins_Gp,
+        Ins_V,
+        Ld__Vms,
+        Ld__Vss,
+        Mla_V,
+        Mla_Ve,
+        Mls_V,
+        Mls_Ve,
+        Movi_V,
+        Mul_V,
+        Mul_Ve,
+        Mvni_V,
+        Neg_S,
+        Neg_V,
+        Not_V,
+        Orn_V,
+        Orr_V,
+        Orr_Vi,
+        Raddhn_V,
+        Rbit_V,
+        Rev16_V,
+        Rev32_V,
+        Rev64_V,
+        Rshrn_V,
+        Rsubhn_V,
+        Saba_V,
+        Sabal_V,
+        Sabd_V,
+        Sabdl_V,
+        Sadalp_V,
+        Saddl_V,
+        Saddlp_V,
+        Saddlv_V,
+        Saddw_V,
+        Scvtf_Gp,
+        Scvtf_Gp_Fixed,
+        Scvtf_S,
+        Scvtf_V,
+        Scvtf_V_Fixed,
+        Sha1c_V,
+        Sha1h_V,
+        Sha1m_V,
+        Sha1p_V,
+        Sha1su0_V,
+        Sha1su1_V,
+        Sha256h_V,
+        Sha256h2_V,
+        Sha256su0_V,
+        Sha256su1_V,
+        Shadd_V,
+        Shl_S,
+        Shl_V,
+        Shll_V,
+        Shrn_V,
+        Shsub_V,
+        Sli_V,
+        Smax_V,
+        Smaxp_V,
+        Smaxv_V,
+        Smin_V,
+        Sminp_V,
+        Sminv_V,
+        Smlal_V,
+        Smlal_Ve,
+        Smlsl_V,
+        Smlsl_Ve,
+        Smov_S,
+        Smull_V,
+        Smull_Ve,
+        Sqabs_S,
+        Sqabs_V,
+        Sqadd_S,
+        Sqadd_V,
+        Sqdmulh_S,
+        Sqdmulh_V,
+        Sqneg_S,
+        Sqneg_V,
+        Sqrdmulh_S,
+        Sqrdmulh_V,
+        Sqrshl_V,
+        Sqrshrn_S,
+        Sqrshrn_V,
+        Sqrshrun_S,
+        Sqrshrun_V,
+        Sqshl_V,
+        Sqshrn_S,
+        Sqshrn_V,
+        Sqshrun_S,
+        Sqshrun_V,
+        Sqsub_S,
+        Sqsub_V,
+        Sqxtn_S,
+        Sqxtn_V,
+        Sqxtun_S,
+        Sqxtun_V,
+        Srhadd_V,
+        Srshl_V,
+        Srshr_S,
+        Srshr_V,
+        Srsra_S,
+        Srsra_V,
+        Sshl_V,
+        Sshll_V,
+        Sshr_S,
+        Sshr_V,
+        Ssra_S,
+        Ssra_V,
+        Ssubl_V,
+        Ssubw_V,
+        St__Vms,
+        St__Vss,
+        Sub_S,
+        Sub_V,
+        Subhn_V,
+        Suqadd_S,
+        Suqadd_V,
+        Tbl_V,
+        Trn1_V,
+        Trn2_V,
+        Uaba_V,
+        Uabal_V,
+        Uabd_V,
+        Uabdl_V,
+        Uadalp_V,
+        Uaddl_V,
+        Uaddlp_V,
+        Uaddlv_V,
+        Uaddw_V,
+        Ucvtf_Gp,
+        Ucvtf_Gp_Fixed,
+        Ucvtf_S,
+        Ucvtf_V,
+        Ucvtf_V_Fixed,
+        Uhadd_V,
+        Uhsub_V,
+        Umax_V,
+        Umaxp_V,
+        Umaxv_V,
+        Umin_V,
+        Uminp_V,
+        Uminv_V,
+        Umlal_V,
+        Umlal_Ve,
+        Umlsl_V,
+        Umlsl_Ve,
+        Umov_S,
+        Umull_V,
+        Umull_Ve,
+        Uqadd_S,
+        Uqadd_V,
+        Uqrshl_V,
+        Uqrshrn_S,
+        Uqrshrn_V,
+        Uqshl_V,
+        Uqshrn_S,
+        Uqshrn_V,
+        Uqsub_S,
+        Uqsub_V,
+        Uqxtn_S,
+        Uqxtn_V,
+        Urhadd_V,
+        Urshl_V,
+        Urshr_S,
+        Urshr_V,
+        Ursra_S,
+        Ursra_V,
+        Ushl_V,
+        Ushll_V,
+        Ushr_S,
+        Ushr_V,
+        Usqadd_S,
+        Usqadd_V,
+        Usra_S,
+        Usra_V,
+        Usubl_V,
+        Usubw_V,
+        Uzp1_V,
+        Uzp2_V,
+        Xtn_V,
+        Zip1_V,
+        Zip2_V,
+
+        // Base (AArch32)
+        Blx,
+        Bx,
+        Cmp,
+        Ldm,
+        Ldrb,
+        Ldrd,
+        Ldrh,
+        Ldrsb,
+        Ldrsh,
+        Mov,
+        Stm,
+        Strb,
+        Strd,
+        Strh
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs
new file mode 100644
index 000000000..3a1e91c8e
--- /dev/null
+++ b/ARMeilleure/Instructions/NativeInterface.cs
@@ -0,0 +1,367 @@
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+    static class NativeInterface
+    {
+        private const int ErgSizeLog2 = 4;
+
+        private class ThreadContext
+        {
+            public ExecutionContext Context { get; }
+            public MemoryManager    Memory  { get; }
+
+            public ulong ExclusiveAddress   { get; set; }
+            public ulong ExclusiveValueLow  { get; set; }
+            public ulong ExclusiveValueHigh { get; set; }
+
+            public ThreadContext(ExecutionContext context, MemoryManager memory)
+            {
+                Context = context;
+                Memory  = memory;
+
+                ExclusiveAddress = ulong.MaxValue;
+            }
+        }
+
+        [ThreadStatic]
+        private static ThreadContext _context;
+
+        public static void RegisterThread(ExecutionContext context, MemoryManager memory)
+        {
+            _context = new ThreadContext(context, memory);
+        }
+
+        public static void UnregisterThread()
+        {
+            _context = null;
+        }
+
+        public static void Break(ulong address, int imm)
+        {
+            Statistics.PauseTimer();
+
+            GetContext().OnBreak(address, imm);
+
+            Statistics.ResumeTimer();
+        }
+
+        public static void SupervisorCall(ulong address, int imm)
+        {
+            Statistics.PauseTimer();
+
+            GetContext().OnSupervisorCall(address, imm);
+
+            Statistics.ResumeTimer();
+        }
+
+        public static void Undefined(ulong address, int opCode)
+        {
+            Statistics.PauseTimer();
+
+            GetContext().OnUndefined(address, opCode);
+
+            Statistics.ResumeTimer();
+        }
+
+#region "System registers"
+        public static ulong GetCtrEl0()
+        {
+            return (ulong)GetContext().CtrEl0;
+        }
+
+        public static ulong GetDczidEl0()
+        {
+            return (ulong)GetContext().DczidEl0;
+        }
+
+        public static ulong GetFpcr()
+        {
+            return (ulong)GetContext().Fpcr;
+        }
+
+        public static ulong GetFpsr()
+        {
+            return (ulong)GetContext().Fpsr;
+        }
+
+        public static ulong GetTpidrEl0()
+        {
+            return (ulong)GetContext().TpidrEl0;
+        }
+
+        public static ulong GetTpidr()
+        {
+            return (ulong)GetContext().Tpidr;
+        }
+
+        public static ulong GetCntfrqEl0()
+        {
+            return GetContext().CntfrqEl0;
+        }
+
+        public static ulong GetCntpctEl0()
+        {
+            return GetContext().CntpctEl0;
+        }
+
+        public static void SetFpcr(ulong value)
+        {
+            GetContext().Fpcr = (FPCR)value;
+        }
+
+        public static void SetFpsr(ulong value)
+        {
+            GetContext().Fpsr = (FPSR)value;
+        }
+
+        public static void SetTpidrEl0(ulong value)
+        {
+            GetContext().TpidrEl0 = (long)value;
+        }
+#endregion
+
+#region "Read"
+        public static byte ReadByte(ulong address)
+        {
+            return GetMemoryManager().ReadByte((long)address);
+        }
+
+        public static ushort ReadUInt16(ulong address)
+        {
+            return GetMemoryManager().ReadUInt16((long)address);
+        }
+
+        public static uint ReadUInt32(ulong address)
+        {
+            return GetMemoryManager().ReadUInt32((long)address);
+        }
+
+        public static ulong ReadUInt64(ulong address)
+        {
+            return GetMemoryManager().ReadUInt64((long)address);
+        }
+
+        public static V128 ReadVector128(ulong address)
+        {
+            return GetMemoryManager().ReadVector128((long)address);
+        }
+#endregion
+
+#region "Read exclusive"
+        public static byte ReadByteExclusive(ulong address)
+        {
+            byte value = _context.Memory.ReadByte((long)address);
+
+            _context.ExclusiveAddress   = GetMaskedExclusiveAddress(address);
+            _context.ExclusiveValueLow  = value;
+            _context.ExclusiveValueHigh = 0;
+
+            return value;
+        }
+
+        public static ushort ReadUInt16Exclusive(ulong address)
+        {
+            ushort value = _context.Memory.ReadUInt16((long)address);
+
+            _context.ExclusiveAddress   = GetMaskedExclusiveAddress(address);
+            _context.ExclusiveValueLow  = value;
+            _context.ExclusiveValueHigh = 0;
+
+            return value;
+        }
+
+        public static uint ReadUInt32Exclusive(ulong address)
+        {
+            uint value = _context.Memory.ReadUInt32((long)address);
+
+            _context.ExclusiveAddress   = GetMaskedExclusiveAddress(address);
+            _context.ExclusiveValueLow  = value;
+            _context.ExclusiveValueHigh = 0;
+
+            return value;
+        }
+
+        public static ulong ReadUInt64Exclusive(ulong address)
+        {
+            ulong value = _context.Memory.ReadUInt64((long)address);
+
+            _context.ExclusiveAddress   = GetMaskedExclusiveAddress(address);
+            _context.ExclusiveValueLow  = value;
+            _context.ExclusiveValueHigh = 0;
+
+            return value;
+        }
+
+        public static V128 ReadVector128Exclusive(ulong address)
+        {
+            V128 value = _context.Memory.AtomicLoadInt128((long)address);
+
+            _context.ExclusiveAddress   = GetMaskedExclusiveAddress(address);
+            _context.ExclusiveValueLow  = value.GetUInt64(0);
+            _context.ExclusiveValueHigh = value.GetUInt64(1);
+
+            return value;
+        }
+#endregion
+
+#region "Write"
+        public static void WriteByte(ulong address, byte value)
+        {
+            GetMemoryManager().WriteByte((long)address, value);
+        }
+
+        public static void WriteUInt16(ulong address, ushort value)
+        {
+            GetMemoryManager().WriteUInt16((long)address, value);
+        }
+
+        public static void WriteUInt32(ulong address, uint value)
+        {
+            GetMemoryManager().WriteUInt32((long)address, value);
+        }
+
+        public static void WriteUInt64(ulong address, ulong value)
+        {
+            GetMemoryManager().WriteUInt64((long)address, value);
+        }
+
+        public static void WriteVector128(ulong address, V128 value)
+        {
+            GetMemoryManager().WriteVector128((long)address, value);
+        }
+#endregion
+
+#region "Write exclusive"
+        public static int WriteByteExclusive(ulong address, byte value)
+        {
+            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+            if (success)
+            {
+                success = _context.Memory.AtomicCompareExchangeByte(
+                    (long)address,
+                    (byte)_context.ExclusiveValueLow,
+                    (byte)value);
+
+                if (success)
+                {
+                    ClearExclusive();
+                }
+            }
+
+            return success ? 0 : 1;
+        }
+
+        public static int WriteUInt16Exclusive(ulong address, ushort value)
+        {
+            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+            if (success)
+            {
+                success = _context.Memory.AtomicCompareExchangeInt16(
+                    (long)address,
+                    (short)_context.ExclusiveValueLow,
+                    (short)value);
+
+                if (success)
+                {
+                    ClearExclusive();
+                }
+            }
+
+            return success ? 0 : 1;
+        }
+
+        public static int WriteUInt32Exclusive(ulong address, uint value)
+        {
+            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+            if (success)
+            {
+                success = _context.Memory.AtomicCompareExchangeInt32(
+                    (long)address,
+                    (int)_context.ExclusiveValueLow,
+                    (int)value);
+
+                if (success)
+                {
+                    ClearExclusive();
+                }
+            }
+
+            return success ? 0 : 1;
+        }
+
+        public static int WriteUInt64Exclusive(ulong address, ulong value)
+        {
+            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+            if (success)
+            {
+                success = _context.Memory.AtomicCompareExchangeInt64(
+                    (long)address,
+                    (long)_context.ExclusiveValueLow,
+                    (long)value);
+
+                if (success)
+                {
+                    ClearExclusive();
+                }
+            }
+
+            return success ? 0 : 1;
+        }
+
+        public static int WriteVector128Exclusive(ulong address, V128 value)
+        {
+            bool success = _context.ExclusiveAddress == GetMaskedExclusiveAddress(address);
+
+            if (success)
+            {
+                V128 expected = new V128(_context.ExclusiveValueLow, _context.ExclusiveValueHigh);
+
+                success = _context.Memory.AtomicCompareExchangeInt128((long)address, expected, value);
+
+                if (success)
+                {
+                    ClearExclusive();
+                }
+            }
+
+            return success ? 0 : 1;
+        }
+#endregion
+
+        private static ulong GetMaskedExclusiveAddress(ulong address)
+        {
+            return address & ~((4UL << ErgSizeLog2) - 1);
+        }
+
+        public static void ClearExclusive()
+        {
+            _context.ExclusiveAddress = ulong.MaxValue;
+        }
+
+        public static void CheckSynchronization()
+        {
+            Statistics.PauseTimer();
+
+            GetContext().CheckInterrupt();
+
+            Statistics.ResumeTimer();
+        }
+
+        public static ExecutionContext GetContext()
+        {
+            return _context.Context;
+        }
+
+        public static MemoryManager GetMemoryManager()
+        {
+            return _context.Memory;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs
new file mode 100644
index 000000000..dc0309218
--- /dev/null
+++ b/ARMeilleure/Instructions/SoftFallback.cs
@@ -0,0 +1,1307 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Instructions
+{
+    static class SoftFallback
+    {
+#region "ShlReg"
+        public static long SignedShlReg(long value, long shift, bool round, int size)
+        {
+            int eSize = 8 << size;
+
+            int shiftLsB = (sbyte)shift;
+
+            if (shiftLsB < 0)
+            {
+                return SignedShrReg(value, -shiftLsB, round, eSize);
+            }
+            else if (shiftLsB > 0)
+            {
+                if (shiftLsB >= eSize)
+                {
+                    return 0L;
+                }
+
+                return value << shiftLsB;
+            }
+            else /* if (shiftLsB == 0) */
+            {
+                return value;
+            }
+        }
+
+        public static ulong UnsignedShlReg(ulong value, ulong shift, bool round, int size)
+        {
+            int eSize = 8 << size;
+
+            int shiftLsB = (sbyte)shift;
+
+            if (shiftLsB < 0)
+            {
+                return UnsignedShrReg(value, -shiftLsB, round, eSize);
+            }
+            else if (shiftLsB > 0)
+            {
+                if (shiftLsB >= eSize)
+                {
+                    return 0UL;
+                }
+
+                return value << shiftLsB;
+            }
+            else /* if (shiftLsB == 0) */
+            {
+                return value;
+            }
+        }
+
+        public static long SignedShlRegSatQ(long value, long shift, bool round, int size)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            int eSize = 8 << size;
+
+            int shiftLsB = (sbyte)shift;
+
+            if (shiftLsB < 0)
+            {
+                return SignedShrReg(value, -shiftLsB, round, eSize);
+            }
+            else if (shiftLsB > 0)
+            {
+                if (shiftLsB >= eSize)
+                {
+                    return SignedSignSatQ(value, eSize, context);
+                }
+
+                if (eSize == 64)
+                {
+                    long shl = value << shiftLsB;
+                    long shr = shl   >> shiftLsB;
+
+                    if (shr != value)
+                    {
+                        return SignedSignSatQ(value, eSize, context);
+                    }
+                    else /* if (shr == value) */
+                    {
+                        return shl;
+                    }
+                }
+                else /* if (eSize != 64) */
+                {
+                    return SignedSrcSignedDstSatQ(value << shiftLsB, size);
+                }
+            }
+            else /* if (shiftLsB == 0) */
+            {
+                return value;
+            }
+        }
+
+        public static ulong UnsignedShlRegSatQ(ulong value, ulong shift, bool round, int size)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            int eSize = 8 << size;
+
+            int shiftLsB = (sbyte)shift;
+
+            if (shiftLsB < 0)
+            {
+                return UnsignedShrReg(value, -shiftLsB, round, eSize);
+            }
+            else if (shiftLsB > 0)
+            {
+                if (shiftLsB >= eSize)
+                {
+                    return UnsignedSignSatQ(value, eSize, context);
+                }
+
+                if (eSize == 64)
+                {
+                    ulong shl = value << shiftLsB;
+                    ulong shr = shl   >> shiftLsB;
+
+                    if (shr != value)
+                    {
+                        return UnsignedSignSatQ(value, eSize, context);
+                    }
+                    else /* if (shr == value) */
+                    {
+                        return shl;
+                    }
+                }
+                else /* if (eSize != 64) */
+                {
+                    return UnsignedSrcUnsignedDstSatQ(value << shiftLsB, size);
+                }
+            }
+            else /* if (shiftLsB == 0) */
+            {
+                return value;
+            }
+        }
+
+        private static long SignedShrReg(long value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+        {
+            if (round)
+            {
+                if (shift >= eSize)
+                {
+                    return 0L;
+                }
+
+                long roundConst = 1L << (shift - 1);
+
+                long add = value + roundConst;
+
+                if (eSize == 64)
+                {
+                    if ((~value & (value ^ add)) < 0L)
+                    {
+                        return (long)((ulong)add >> shift);
+                    }
+                    else
+                    {
+                        return add >> shift;
+                    }
+                }
+                else /* if (eSize != 64) */
+                {
+                    return add >> shift;
+                }
+            }
+            else /* if (!round) */
+            {
+                if (shift >= eSize)
+                {
+                    if (value < 0L)
+                    {
+                        return -1L;
+                    }
+                    else /* if (value >= 0L) */
+                    {
+                        return 0L;
+                    }
+                }
+
+                return value >> shift;
+            }
+        }
+
+        private static ulong UnsignedShrReg(ulong value, int shift, bool round, int eSize) // shift := [1, 128]; eSize := {8, 16, 32, 64}.
+        {
+            if (round)
+            {
+                if (shift > 64)
+                {
+                    return 0UL;
+                }
+
+                ulong roundConst = 1UL << (shift - 1);
+
+                ulong add = value + roundConst;
+
+                if (eSize == 64)
+                {
+                    if ((add < value) && (add < roundConst))
+                    {
+                        if (shift == 64)
+                        {
+                            return 1UL;
+                        }
+
+                        return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
+                    }
+                    else
+                    {
+                        if (shift == 64)
+                        {
+                            return 0UL;
+                        }
+
+                        return add >> shift;
+                    }
+                }
+                else /* if (eSize != 64) */
+                {
+                    if (shift == 64)
+                    {
+                        return 0UL;
+                    }
+
+                    return add >> shift;
+                }
+            }
+            else /* if (!round) */
+            {
+                if (shift >= eSize)
+                {
+                    return 0UL;
+                }
+
+                return value >> shift;
+            }
+        }
+
+        private static long SignedSignSatQ(long op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}.
+        {
+            long tMaxValue =  (1L << (eSize - 1)) - 1L;
+            long tMinValue = -(1L << (eSize - 1));
+
+            if (op > 0L)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return tMaxValue;
+            }
+            else if (op < 0L)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return tMinValue;
+            }
+            else
+            {
+                return 0L;
+            }
+        }
+
+        private static ulong UnsignedSignSatQ(ulong op, int eSize, ExecutionContext context) // eSize := {8, 16, 32, 64}.
+        {
+            ulong tMaxValue = ulong.MaxValue >> (64 - eSize);
+
+            if (op > 0UL)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return tMaxValue;
+            }
+            else
+            {
+                return 0UL;
+            }
+        }
+#endregion
+
+#region "ShrImm64"
+        public static long SignedShrImm64(long value, long roundConst, int shift)
+        {
+            if (roundConst == 0L)
+            {
+                if (shift <= 63)
+                {
+                    return value >> shift;
+                }
+                else /* if (shift == 64) */
+                {
+                    if (value < 0L)
+                    {
+                        return -1L;
+                    }
+                    else /* if (value >= 0L) */
+                    {
+                        return 0L;
+                    }
+                }
+            }
+            else /* if (roundConst == 1L << (shift - 1)) */
+            {
+                if (shift <= 63)
+                {
+                    long add = value + roundConst;
+
+                    if ((~value & (value ^ add)) < 0L)
+                    {
+                        return (long)((ulong)add >> shift);
+                    }
+                    else
+                    {
+                        return add >> shift;
+                    }
+                }
+                else /* if (shift == 64) */
+                {
+                    return 0L;
+                }
+            }
+        }
+
+        public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
+        {
+            if (roundConst == 0L)
+            {
+                if (shift <= 63)
+                {
+                    return value >> shift;
+                }
+                else /* if (shift == 64) */
+                {
+                    return 0UL;
+                }
+            }
+            else /* if (roundConst == 1L << (shift - 1)) */
+            {
+                ulong add = value + (ulong)roundConst;
+
+                if ((add < value) && (add < (ulong)roundConst))
+                {
+                    if (shift <= 63)
+                    {
+                        return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
+                    }
+                    else /* if (shift == 64) */
+                    {
+                        return 1UL;
+                    }
+                }
+                else
+                {
+                    if (shift <= 63)
+                    {
+                        return add >> shift;
+                    }
+                    else /* if (shift == 64) */
+                    {
+                        return 0UL;
+                    }
+                }
+            }
+        }
+#endregion
+
+#region "Rounding"
+        public static double Round(double value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            FPRoundingMode roundMode = context.Fpcr.GetRoundingMode();
+
+            if (roundMode == FPRoundingMode.ToNearest)
+            {
+                return Math.Round(value); // even
+            }
+            else if (roundMode == FPRoundingMode.TowardsPlusInfinity)
+            {
+                return Math.Ceiling(value);
+            }
+            else if (roundMode == FPRoundingMode.TowardsMinusInfinity)
+            {
+                return Math.Floor(value);
+            }
+            else /* if (roundMode == FPRoundingMode.TowardsZero) */
+            {
+                return Math.Truncate(value);
+            }
+        }
+
+        public static float RoundF(float value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            FPRoundingMode roundMode = context.Fpcr.GetRoundingMode();
+
+            if (roundMode == FPRoundingMode.ToNearest)
+            {
+                return MathF.Round(value); // even
+            }
+            else if (roundMode == FPRoundingMode.TowardsPlusInfinity)
+            {
+                return MathF.Ceiling(value);
+            }
+            else if (roundMode == FPRoundingMode.TowardsMinusInfinity)
+            {
+                return MathF.Floor(value);
+            }
+            else /* if (roundMode == FPRoundingMode.TowardsZero) */
+            {
+                return MathF.Truncate(value);
+            }
+        }
+#endregion
+
+#region "Saturation"
+        public static int SatF32ToS32(float value)
+        {
+            if (float.IsNaN(value)) return 0;
+
+            return value >= int.MaxValue ? int.MaxValue :
+                   value <= int.MinValue ? int.MinValue : (int)value;
+        }
+
+        public static long SatF32ToS64(float value)
+        {
+            if (float.IsNaN(value)) return 0;
+
+            return value >= long.MaxValue ? long.MaxValue :
+                   value <= long.MinValue ? long.MinValue : (long)value;
+        }
+
+        public static uint SatF32ToU32(float value)
+        {
+            if (float.IsNaN(value)) return 0;
+
+            return value >= uint.MaxValue ? uint.MaxValue :
+                   value <= uint.MinValue ? uint.MinValue : (uint)value;
+        }
+
+        public static ulong SatF32ToU64(float value)
+        {
+            if (float.IsNaN(value)) return 0;
+
+            return value >= ulong.MaxValue ? ulong.MaxValue :
+                   value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+        }
+
+        public static int SatF64ToS32(double value)
+        {
+            if (double.IsNaN(value)) return 0;
+
+            return value >= int.MaxValue ? int.MaxValue :
+                   value <= int.MinValue ? int.MinValue : (int)value;
+        }
+
+        public static long SatF64ToS64(double value)
+        {
+            if (double.IsNaN(value)) return 0;
+
+            return value >= long.MaxValue ? long.MaxValue :
+                   value <= long.MinValue ? long.MinValue : (long)value;
+        }
+
+        public static uint SatF64ToU32(double value)
+        {
+            if (double.IsNaN(value)) return 0;
+
+            return value >= uint.MaxValue ? uint.MaxValue :
+                   value <= uint.MinValue ? uint.MinValue : (uint)value;
+        }
+
+        public static ulong SatF64ToU64(double value)
+        {
+            if (double.IsNaN(value)) return 0;
+
+            return value >= ulong.MaxValue ? ulong.MaxValue :
+                   value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
+        }
+#endregion
+
+#region "Saturating"
+        public static long SignedSrcSignedDstSatQ(long op, int size)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            int eSize = 8 << size;
+
+            long tMaxValue =  (1L << (eSize - 1)) - 1L;
+            long tMinValue = -(1L << (eSize - 1));
+
+            if (op > tMaxValue)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return tMaxValue;
+            }
+            else if (op < tMinValue)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return tMinValue;
+            }
+            else
+            {
+                return op;
+            }
+        }
+
+        public static ulong SignedSrcUnsignedDstSatQ(long op, int size)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            int eSize = 8 << size;
+
+            ulong tMaxValue = (1UL << eSize) - 1UL;
+            ulong tMinValue =  0UL;
+
+            if (op > (long)tMaxValue)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return tMaxValue;
+            }
+            else if (op < (long)tMinValue)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return tMinValue;
+            }
+            else
+            {
+                return (ulong)op;
+            }
+        }
+
+        public static long UnsignedSrcSignedDstSatQ(ulong op, int size)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            int eSize = 8 << size;
+
+            long tMaxValue = (1L << (eSize - 1)) - 1L;
+
+            if (op > (ulong)tMaxValue)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return tMaxValue;
+            }
+            else
+            {
+                return (long)op;
+            }
+        }
+
+        public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            int eSize = 8 << size;
+
+            ulong tMaxValue = (1UL << eSize) - 1UL;
+
+            if (op > tMaxValue)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return tMaxValue;
+            }
+            else
+            {
+                return op;
+            }
+        }
+
+        public static long UnarySignedSatQAbsOrNeg(long op)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            if (op == long.MinValue)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return long.MaxValue;
+            }
+            else
+            {
+                return op;
+            }
+        }
+
+        public static long BinarySignedSatQAdd(long op1, long op2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            long add = op1 + op2;
+
+            if ((~(op1 ^ op2) & (op1 ^ add)) < 0L)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                if (op1 < 0L)
+                {
+                    return long.MinValue;
+                }
+                else
+                {
+                    return long.MaxValue;
+                }
+            }
+            else
+            {
+                return add;
+            }
+        }
+
+        public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            ulong add = op1 + op2;
+
+            if ((add < op1) && (add < op2))
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return ulong.MaxValue;
+            }
+            else
+            {
+                return add;
+            }
+        }
+
+        public static long BinarySignedSatQSub(long op1, long op2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            long sub = op1 - op2;
+
+            if (((op1 ^ op2) & (op1 ^ sub)) < 0L)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                if (op1 < 0L)
+                {
+                    return long.MinValue;
+                }
+                else
+                {
+                    return long.MaxValue;
+                }
+            }
+            else
+            {
+                return sub;
+            }
+        }
+
+        public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            ulong sub = op1 - op2;
+
+            if (op1 < op2)
+            {
+                context.Fpsr |= FPSR.Qc;
+
+                return ulong.MinValue;
+            }
+            else
+            {
+                return sub;
+            }
+        }
+
+        public static long BinarySignedSatQAcc(ulong op1, long op2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            if (op1 <= (ulong)long.MaxValue)
+            {
+                // op1 from ulong.MinValue to (ulong)long.MaxValue
+                // op2 from long.MinValue to long.MaxValue
+
+                long add = (long)op1 + op2;
+
+                if ((~op2 & add) < 0L)
+                {
+                    context.Fpsr |= FPSR.Qc;
+
+                    return long.MaxValue;
+                }
+                else
+                {
+                    return add;
+                }
+            }
+            else if (op2 >= 0L)
+            {
+                // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+                // op2 from (long)ulong.MinValue to long.MaxValue
+
+                context.Fpsr |= FPSR.Qc;
+
+                return long.MaxValue;
+            }
+            else
+            {
+                // op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+                // op2 from long.MinValue to (long)ulong.MinValue - 1L
+
+                ulong add = op1 + (ulong)op2;
+
+                if (add > (ulong)long.MaxValue)
+                {
+                    context.Fpsr |= FPSR.Qc;
+
+                    return long.MaxValue;
+                }
+                else
+                {
+                    return (long)add;
+                }
+            }
+        }
+
+        public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            if (op1 >= 0L)
+            {
+                // op1 from (long)ulong.MinValue to long.MaxValue
+                // op2 from ulong.MinValue to ulong.MaxValue
+
+                ulong add = (ulong)op1 + op2;
+
+                if ((add < (ulong)op1) && (add < op2))
+                {
+                    context.Fpsr |= FPSR.Qc;
+
+                    return ulong.MaxValue;
+                }
+                else
+                {
+                    return add;
+                }
+            }
+            else if (op2 > (ulong)long.MaxValue)
+            {
+                // op1 from long.MinValue to (long)ulong.MinValue - 1L
+                // op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
+
+                return (ulong)op1 + op2;
+            }
+            else
+            {
+                // op1 from long.MinValue to (long)ulong.MinValue - 1L
+                // op2 from ulong.MinValue to (ulong)long.MaxValue
+
+                long add = op1 + (long)op2;
+
+                if (add < (long)ulong.MinValue)
+                {
+                    context.Fpsr |= FPSR.Qc;
+
+                    return ulong.MinValue;
+                }
+                else
+                {
+                    return (ulong)add;
+                }
+            }
+        }
+#endregion
+
+#region "Count"
+        public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+        {
+            value ^= value >> 1;
+
+            int highBit = size - 2;
+
+            for (int bit = highBit; bit >= 0; bit--)
+            {
+                if (((int)(value >> bit) & 0b1) != 0)
+                {
+                    return (ulong)(highBit - bit);
+                }
+            }
+
+            return (ulong)(size - 1);
+        }
+
+        private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+        public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
+        {
+            if (value == 0ul)
+            {
+                return (ulong)size;
+            }
+
+            int nibbleIdx = size;
+            int preCount, count = 0;
+
+            do
+            {
+                nibbleIdx -= 4;
+                preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
+                count += preCount;
+            }
+            while (preCount == 4);
+
+            return (ulong)count;
+        }
+
+        public static ulong CountSetBits8(ulong value) // "size" is 8 (SIMD&FP Inst.).
+        {
+            value = ((value >> 1) & 0x55ul) + (value & 0x55ul);
+            value = ((value >> 2) & 0x33ul) + (value & 0x33ul);
+
+            return (value >> 4) + (value & 0x0ful);
+        }
+#endregion
+
+#region "Table"
+        public static V128 Tbl1_V64(V128 vector, V128 tb0)
+        {
+            return Tbl(vector, 8, tb0);
+        }
+
+        public static V128 Tbl1_V128(V128 vector, V128 tb0)
+        {
+            return Tbl(vector, 16, tb0);
+        }
+
+        public static V128 Tbl2_V64(V128 vector, V128 tb0, V128 tb1)
+        {
+            return Tbl(vector, 8, tb0, tb1);
+        }
+
+        public static V128 Tbl2_V128(V128 vector, V128 tb0, V128 tb1)
+        {
+            return Tbl(vector, 16, tb0, tb1);
+        }
+
+        public static V128 Tbl3_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2)
+        {
+            return Tbl(vector, 8, tb0, tb1, tb2);
+        }
+
+        public static V128 Tbl3_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2)
+        {
+            return Tbl(vector, 16, tb0, tb1, tb2);
+        }
+
+        public static V128 Tbl4_V64(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+        {
+            return Tbl(vector, 8, tb0, tb1, tb2, tb3);
+        }
+
+        public static V128 Tbl4_V128(V128 vector, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
+        {
+            return Tbl(vector, 16, tb0, tb1, tb2, tb3);
+        }
+
+        private static V128 Tbl(V128 vector, int bytes, params V128[] tb)
+        {
+            byte[] res   = new byte[16];
+            byte[] table = new byte[tb.Length * 16];
+
+            for (byte index = 0; index < tb.Length; index++)
+            {
+                Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
+            }
+
+            byte[] v = vector.ToArray();
+
+            for (byte index = 0; index < bytes; index++)
+            {
+                byte tblIndex = v[index];
+
+                if (tblIndex < table.Length)
+                {
+                    res[index] = table[tblIndex];
+                }
+            }
+
+            return new V128(res);
+        }
+#endregion
+
+#region "Crc32"
+        private const uint Crc32RevPoly  = 0xedb88320;
+        private const uint Crc32cRevPoly = 0x82f63b78;
+
+        public static uint Crc32b(uint crc, byte   value) => Crc32 (crc, Crc32RevPoly, value);
+        public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value);
+        public static uint Crc32w(uint crc, uint   value) => Crc32w(crc, Crc32RevPoly, value);
+        public static uint Crc32x(uint crc, ulong  value) => Crc32x(crc, Crc32RevPoly, value);
+
+        public static uint Crc32cb(uint crc, byte   value) => Crc32 (crc, Crc32cRevPoly, value);
+        public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value);
+        public static uint Crc32cw(uint crc, uint   value) => Crc32w(crc, Crc32cRevPoly, value);
+        public static uint Crc32cx(uint crc, ulong  value) => Crc32x(crc, Crc32cRevPoly, value);
+
+        private static uint Crc32h(uint crc, uint poly, ushort val)
+        {
+            crc = Crc32(crc, poly, (byte)(val >> 0));
+            crc = Crc32(crc, poly, (byte)(val >> 8));
+
+            return crc;
+        }
+
+        private static uint Crc32w(uint crc, uint poly, uint val)
+        {
+            crc = Crc32(crc, poly, (byte)(val >> 0));
+            crc = Crc32(crc, poly, (byte)(val >> 8));
+            crc = Crc32(crc, poly, (byte)(val >> 16));
+            crc = Crc32(crc, poly, (byte)(val >> 24));
+
+            return crc;
+        }
+
+        private static uint Crc32x(uint crc, uint poly, ulong val)
+        {
+            crc = Crc32(crc, poly, (byte)(val >> 0));
+            crc = Crc32(crc, poly, (byte)(val >> 8));
+            crc = Crc32(crc, poly, (byte)(val >> 16));
+            crc = Crc32(crc, poly, (byte)(val >> 24));
+            crc = Crc32(crc, poly, (byte)(val >> 32));
+            crc = Crc32(crc, poly, (byte)(val >> 40));
+            crc = Crc32(crc, poly, (byte)(val >> 48));
+            crc = Crc32(crc, poly, (byte)(val >> 56));
+
+            return crc;
+        }
+
+        private static uint Crc32(uint crc, uint poly, byte val)
+        {
+            crc ^= val;
+
+            for (int bit = 7; bit >= 0; bit--)
+            {
+                uint mask = (uint)(-(int)(crc & 1));
+
+                crc = (crc >> 1) ^ (poly & mask);
+            }
+
+            return crc;
+        }
+#endregion
+
+#region "Aes"
+        public static V128 Decrypt(V128 value, V128 roundKey)
+        {
+            return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey));
+        }
+
+        public static V128 Encrypt(V128 value, V128 roundKey)
+        {
+            return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey));
+        }
+
+        public static V128 InverseMixColumns(V128 value)
+        {
+            return CryptoHelper.AesInvMixColumns(value);
+        }
+
+        public static V128 MixColumns(V128 value)
+        {
+            return CryptoHelper.AesMixColumns(value);
+        }
+#endregion
+
+#region "Sha1"
+        public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk)
+        {
+            for (int e = 0; e <= 3; e++)
+            {
+                uint t = ShaChoose(hash_abcd.GetUInt32(1),
+                                   hash_abcd.GetUInt32(2),
+                                   hash_abcd.GetUInt32(3));
+
+                hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e);
+
+                t = Rol(hash_abcd.GetUInt32(1), 30);
+
+                hash_abcd.Insert(1, t);
+
+                Rol32_160(ref hash_e, ref hash_abcd);
+            }
+
+            return hash_abcd;
+        }
+
+        public static uint FixedRotate(uint hash_e)
+        {
+            return hash_e.Rol(30);
+        }
+
+        public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk)
+        {
+            for (int e = 0; e <= 3; e++)
+            {
+                uint t = ShaMajority(hash_abcd.GetUInt32(1),
+                                     hash_abcd.GetUInt32(2),
+                                     hash_abcd.GetUInt32(3));
+
+                hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e);
+
+                t = Rol(hash_abcd.GetUInt32(1), 30);
+
+                hash_abcd.Insert(1, t);
+
+                Rol32_160(ref hash_e, ref hash_abcd);
+            }
+
+            return hash_abcd;
+        }
+
+        public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk)
+        {
+            for (int e = 0; e <= 3; e++)
+            {
+                uint t = ShaParity(hash_abcd.GetUInt32(1),
+                                   hash_abcd.GetUInt32(2),
+                                   hash_abcd.GetUInt32(3));
+
+                hash_e += Rol(hash_abcd.GetUInt32(0), 5) + t + wk.GetUInt32(e);
+
+                t = Rol(hash_abcd.GetUInt32(1), 30);
+
+                hash_abcd.Insert(1, t);
+
+                Rol32_160(ref hash_e, ref hash_abcd);
+            }
+
+            return hash_abcd;
+        }
+
+        public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11)
+        {
+            ulong t2 = w4_7.GetUInt64(0);
+            ulong t1 = w0_3.GetUInt64(1);
+
+            V128 result = new V128(t1, t2);
+
+            return result ^ (w0_3 ^ w8_11);
+        }
+
+        public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15)
+        {
+            V128 t = tw0_3 ^ (w12_15 >> 32);
+
+            uint tE0 = t.GetUInt32(0);
+            uint tE1 = t.GetUInt32(1);
+            uint tE2 = t.GetUInt32(2);
+            uint tE3 = t.GetUInt32(3);
+
+            return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2));
+        }
+
+        private static void Rol32_160(ref uint y, ref V128 x)
+        {
+            uint xE3 = x.GetUInt32(3);
+
+            x <<= 32;
+            x.Insert(0, y);
+
+            y = xE3;
+        }
+
+        private static uint ShaChoose(uint x, uint y, uint z)
+        {
+            return ((y ^ z) & x) ^ z;
+        }
+
+        private static uint ShaMajority(uint x, uint y, uint z)
+        {
+            return (x & y) | ((x | y) & z);
+        }
+
+        private static uint ShaParity(uint x, uint y, uint z)
+        {
+            return x ^ y ^ z;
+        }
+
+        private static uint Rol(this uint value, int count)
+        {
+            return (value << count) | (value >> (32 - count));
+        }
+#endregion
+
+#region "Sha256"
+        public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk)
+        {
+            return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
+        }
+
+        public static V128 HashUpper(V128 hash_efgh, V128 hash_abcd, V128 wk)
+        {
+            return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
+        }
+
+        public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7)
+        {
+            V128 result = new V128();
+
+            for (int e = 0; e <= 3; e++)
+            {
+                uint elt = (e <= 2 ? w0_3 : w4_7).GetUInt32(e <= 2 ? e + 1 : 0);
+
+                elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
+
+                elt += w0_3.GetUInt32(e);
+
+                result.Insert(e, elt);
+            }
+
+            return result;
+        }
+
+        public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15)
+        {
+            V128 result = new V128();
+
+            ulong t1 = w12_15.GetUInt64(1);
+
+            for (int e = 0; e <= 1; e++)
+            {
+                uint elt = t1.ULongPart(e);
+
+                elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+                elt += w0_3.GetUInt32(e) + w8_11.GetUInt32(e + 1);
+
+                result.Insert(e, elt);
+            }
+
+            t1 = result.GetUInt64(0);
+
+            for (int e = 2; e <= 3; e++)
+            {
+                uint elt = t1.ULongPart(e - 2);
+
+                elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
+
+                elt += w0_3.GetUInt32(e) + (e == 2 ? w8_11 : w12_15).GetUInt32(e == 2 ? 3 : 0);
+
+                result.Insert(e, elt);
+            }
+
+            return result;
+        }
+
+        private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1)
+        {
+            for (int e = 0; e <= 3; e++)
+            {
+                uint chs = ShaChoose(y.GetUInt32(0),
+                                     y.GetUInt32(1),
+                                     y.GetUInt32(2));
+
+                uint maj = ShaMajority(x.GetUInt32(0),
+                                       x.GetUInt32(1),
+                                       x.GetUInt32(2));
+
+                uint t1 = y.GetUInt32(3) + ShaHashSigma1(y.GetUInt32(0)) + chs + w.GetUInt32(e);
+
+                uint t2 = t1 + x.GetUInt32(3);
+
+                x.Insert(3, t2);
+
+                t2 = t1 + ShaHashSigma0(x.GetUInt32(0)) + maj;
+
+                y.Insert(3, t2);
+
+                Rol32_256(ref y, ref x);
+            }
+
+            return part1 ? x : y;
+        }
+
+        private static void Rol32_256(ref V128 y, ref V128 x)
+        {
+            uint yE3 = y.GetUInt32(3);
+            uint xE3 = x.GetUInt32(3);
+
+            y <<= 32;
+            x <<= 32;
+
+            y.Insert(0, xE3);
+            x.Insert(0, yE3);
+        }
+
+        private static uint ShaHashSigma0(uint x)
+        {
+            return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
+        }
+
+        private static uint ShaHashSigma1(uint x)
+        {
+            return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
+        }
+
+        private static uint Ror(this uint value, int count)
+        {
+            return (value >> count) | (value << (32 - count));
+        }
+
+        private static uint Lsr(this uint value, int count)
+        {
+            return value >> count;
+        }
+
+        private static uint ULongPart(this ulong value, int part)
+        {
+            return part == 0
+                ? (uint)(value & 0xFFFFFFFFUL)
+                : (uint)(value >> 32);
+        }
+#endregion
+
+#region "Reverse"
+        public static uint ReverseBits8(uint value)
+        {
+            value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1);
+            value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2);
+
+            return (value >> 4) | ((value & 0x0f) << 4);
+        }
+
+        public static uint ReverseBits32(uint value)
+        {
+            value = ((value & 0xaaaaaaaa) >> 1) | ((value & 0x55555555) << 1);
+            value = ((value & 0xcccccccc) >> 2) | ((value & 0x33333333) << 2);
+            value = ((value & 0xf0f0f0f0) >> 4) | ((value & 0x0f0f0f0f) << 4);
+            value = ((value & 0xff00ff00) >> 8) | ((value & 0x00ff00ff) << 8);
+
+            return (value >> 16) | (value << 16);
+        }
+
+        public static ulong ReverseBits64(ulong value)
+        {
+            value = ((value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((value & 0x5555555555555555) << 1 );
+            value = ((value & 0xcccccccccccccccc) >> 2 ) | ((value & 0x3333333333333333) << 2 );
+            value = ((value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((value & 0x0f0f0f0f0f0f0f0f) << 4 );
+            value = ((value & 0xff00ff00ff00ff00) >> 8 ) | ((value & 0x00ff00ff00ff00ff) << 8 );
+            value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
+
+            return (value >> 32) | (value << 32);
+        }
+
+        public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value);
+
+        public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16);
+        public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32);
+
+        private enum RevSize
+        {
+            Rev16,
+            Rev32,
+            Rev64
+        }
+
+        private static ulong ReverseBytes(ulong value, RevSize size)
+        {
+            value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8);
+
+            if (size == RevSize.Rev16)
+            {
+                return value;
+            }
+
+            value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
+
+            if (size == RevSize.Rev32)
+            {
+                return value;
+            }
+
+            value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32);
+
+            if (size == RevSize.Rev64)
+            {
+                return value;
+            }
+
+            throw new ArgumentException(nameof(size));
+        }
+#endregion
+    }
+}
diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs
new file mode 100644
index 000000000..7358e6b2c
--- /dev/null
+++ b/ARMeilleure/Instructions/SoftFloat.cs
@@ -0,0 +1,2757 @@
+using ARMeilleure.State;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.Instructions
+{
+    static class SoftFloat
+    {
+        static SoftFloat()
+        {
+            RecipEstimateTable     = BuildRecipEstimateTable();
+            RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable();
+        }
+
+        internal static readonly byte[] RecipEstimateTable;
+        internal static readonly byte[] RecipSqrtEstimateTable;
+
+        private static byte[] BuildRecipEstimateTable()
+        {
+            byte[] tbl = new byte[256];
+
+            for (int idx = 0; idx < 256; idx++)
+            {
+                uint src = (uint)idx + 256u;
+
+                Debug.Assert(256u <= src && src < 512u);
+
+                src = (src << 1) + 1u;
+
+                uint aux = (1u << 19) / src;
+
+                uint dst = (aux + 1u) >> 1;
+
+                Debug.Assert(256u <= dst && dst < 512u);
+
+                tbl[idx] = (byte)(dst - 256u);
+            }
+
+            return tbl;
+        }
+
+        private static byte[] BuildRecipSqrtEstimateTable()
+        {
+            byte[] tbl = new byte[384];
+
+            for (int idx = 0; idx < 384; idx++)
+            {
+                uint src = (uint)idx + 128u;
+
+                Debug.Assert(128u <= src && src < 512u);
+
+                if (src < 256u)
+                {
+                    src = (src << 1) + 1u;
+                }
+                else
+                {
+                    src = (src >> 1) << 1;
+                    src = (src + 1u) << 1;
+                }
+
+                uint aux = 512u;
+
+                while (src * (aux + 1u) * (aux + 1u) < (1u << 28))
+                {
+                    aux = aux + 1u;
+                }
+
+                uint dst = (aux + 1u) >> 1;
+
+                Debug.Assert(256u <= dst && dst < 512u);
+
+                tbl[idx] = (byte)(dst - 256u);
+            }
+
+            return tbl;
+        }
+    }
+
+    static class SoftFloat16_32
+    {
+        public static float FPConvert(ushort valueBits)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context);
+
+            float result;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                if ((context.Fpcr & FPCR.Dn) != 0)
+                {
+                    result = FPDefaultNaN();
+                }
+                else
+                {
+                    result = FPConvertNaN(valueBits);
+                }
+
+                if (type == FPType.SNaN)
+                {
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+            }
+            else if (type == FPType.Infinity)
+            {
+                result = FPInfinity(sign);
+            }
+            else if (type == FPType.Zero)
+            {
+                result = FPZero(sign);
+            }
+            else
+            {
+                result = FPRoundCv(real, context);
+            }
+
+            return result;
+        }
+
+        private static float FPDefaultNaN()
+        {
+            return -float.NaN;
+        }
+
+        private static float FPInfinity(bool sign)
+        {
+            return sign ? float.NegativeInfinity : float.PositiveInfinity;
+        }
+
+        private static float FPZero(bool sign)
+        {
+            return sign ? -0f : +0f;
+        }
+
+        private static float FPMaxNormal(bool sign)
+        {
+            return sign ? float.MinValue : float.MaxValue;
+        }
+
+        private static double FPUnpackCv(
+            this ushort valueBits,
+            out FPType type,
+            out bool sign,
+            ExecutionContext context)
+        {
+            sign = (~(uint)valueBits & 0x8000u) == 0u;
+
+            uint exp16  = ((uint)valueBits & 0x7C00u) >> 10;
+            uint frac16 =  (uint)valueBits & 0x03FFu;
+
+            double real;
+
+            if (exp16 == 0u)
+            {
+                if (frac16 == 0u)
+                {
+                    type = FPType.Zero;
+                    real = 0d;
+                }
+                else
+                {
+                    type = FPType.Nonzero; // Subnormal.
+                    real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10));
+                }
+            }
+            else if (exp16 == 0x1Fu && (context.Fpcr & FPCR.Ahp) == 0)
+            {
+                if (frac16 == 0u)
+                {
+                    type = FPType.Infinity;
+                    real = Math.Pow(2d, 1000);
+                }
+                else
+                {
+                    type = (~frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN;
+                    real = 0d;
+                }
+            }
+            else
+            {
+                type = FPType.Nonzero; // Normal.
+                real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10));
+            }
+
+            return sign ? -real : real;
+        }
+
+        private static float FPRoundCv(double real, ExecutionContext context)
+        {
+            const int minimumExp = -126;
+
+            const int e = 8;
+            const int f = 23;
+
+            bool   sign;
+            double mantissa;
+
+            if (real < 0d)
+            {
+                sign     = true;
+                mantissa = -real;
+            }
+            else
+            {
+                sign     = false;
+                mantissa = real;
+            }
+
+            int exponent = 0;
+
+            while (mantissa < 1d)
+            {
+                mantissa *= 2d;
+                exponent--;
+            }
+
+            while (mantissa >= 2d)
+            {
+                mantissa /= 2d;
+                exponent++;
+            }
+
+            if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp)
+            {
+                context.Fpsr |= FPSR.Ufc;
+
+                return FPZero(sign);
+            }
+
+            uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+            if (biasedExp == 0u)
+            {
+                mantissa /= Math.Pow(2d, minimumExp - exponent);
+            }
+
+            uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+            double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+            if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+            {
+                FPProcessException(FPException.Underflow, context);
+            }
+
+            bool overflowToInf;
+            bool roundUp;
+
+            switch (context.Fpcr.GetRoundingMode())
+            {
+                default:
+                case FPRoundingMode.ToNearest:
+                    roundUp       = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+                    overflowToInf = true;
+                    break;
+
+                case FPRoundingMode.TowardsPlusInfinity:
+                    roundUp       = (error != 0d && !sign);
+                    overflowToInf = !sign;
+                    break;
+
+                case FPRoundingMode.TowardsMinusInfinity:
+                    roundUp       = (error != 0d && sign);
+                    overflowToInf = sign;
+                    break;
+
+                case FPRoundingMode.TowardsZero:
+                    roundUp       = false;
+                    overflowToInf = false;
+                    break;
+            }
+
+            if (roundUp)
+            {
+                intMant++;
+
+                if (intMant == 1u << f)
+                {
+                    biasedExp = 1u;
+                }
+
+                if (intMant == 1u << (f + 1))
+                {
+                    biasedExp++;
+                    intMant >>= 1;
+                }
+            }
+
+            float result;
+
+            if (biasedExp >= (1u << e) - 1u)
+            {
+                result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+                FPProcessException(FPException.Overflow, context);
+
+                error = 1d;
+            }
+            else
+            {
+                result = BitConverter.Int32BitsToSingle(
+                    (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu)));
+            }
+
+            if (error != 0d)
+            {
+                FPProcessException(FPException.Inexact, context);
+            }
+
+            return result;
+        }
+
+        private static float FPConvertNaN(ushort valueBits)
+        {
+            return BitConverter.Int32BitsToSingle(
+                (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13));
+        }
+
+        private static void FPProcessException(FPException exc, ExecutionContext context)
+        {
+            int enable = (int)exc + 8;
+
+            if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+            {
+                throw new NotImplementedException("Floating-point trap handling.");
+            }
+            else
+            {
+                context.Fpsr |= (FPSR)(1 << (int)exc);
+            }
+        }
+    }
+
+    static class SoftFloat32_16
+    {
+        public static ushort FPConvert(float value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context);
+
+            bool altHp = (context.Fpcr & FPCR.Ahp) != 0;
+
+            ushort resultBits;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                if (altHp)
+                {
+                    resultBits = FPZero(sign);
+                }
+                else if ((context.Fpcr & FPCR.Dn) != 0)
+                {
+                    resultBits = FPDefaultNaN();
+                }
+                else
+                {
+                    resultBits = FPConvertNaN(valueBits);
+                }
+
+                if (type == FPType.SNaN || altHp)
+                {
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+            }
+            else if (type == FPType.Infinity)
+            {
+                if (altHp)
+                {
+                    resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else
+                {
+                    resultBits = FPInfinity(sign);
+                }
+            }
+            else if (type == FPType.Zero)
+            {
+                resultBits = FPZero(sign);
+            }
+            else
+            {
+                resultBits = FPRoundCv(real, context);
+            }
+
+            return resultBits;
+        }
+
+        private static ushort FPDefaultNaN()
+        {
+            return (ushort)0x7E00u;
+        }
+
+        private static ushort FPInfinity(bool sign)
+        {
+            return sign ? (ushort)0xFC00u : (ushort)0x7C00u;
+        }
+
+        private static ushort FPZero(bool sign)
+        {
+            return sign ? (ushort)0x8000u : (ushort)0x0000u;
+        }
+
+        private static ushort FPMaxNormal(bool sign)
+        {
+            return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu;
+        }
+
+        private static double FPUnpackCv(
+            this float value,
+            out FPType type,
+            out bool sign,
+            out uint valueBits,
+            ExecutionContext context)
+        {
+            valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+            sign = (~valueBits & 0x80000000u) == 0u;
+
+            uint exp32  = (valueBits & 0x7F800000u) >> 23;
+            uint frac32 =  valueBits & 0x007FFFFFu;
+
+            double real;
+
+            if (exp32 == 0u)
+            {
+                if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0)
+                {
+                    type = FPType.Zero;
+                    real = 0d;
+
+                    if (frac32 != 0u)
+                    {
+                        FPProcessException(FPException.InputDenorm, context);
+                    }
+                }
+                else
+                {
+                    type = FPType.Nonzero; // Subnormal.
+                    real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23));
+                }
+            }
+            else if (exp32 == 0xFFu)
+            {
+                if (frac32 == 0u)
+                {
+                    type = FPType.Infinity;
+                    real = Math.Pow(2d, 1000);
+                }
+                else
+                {
+                    type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+                    real = 0d;
+                }
+            }
+            else
+            {
+                type = FPType.Nonzero; // Normal.
+                real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23));
+            }
+
+            return sign ? -real : real;
+        }
+
+        private static ushort FPRoundCv(double real, ExecutionContext context)
+        {
+            const int minimumExp = -14;
+
+            const int e = 5;
+            const int f = 10;
+
+            bool   sign;
+            double mantissa;
+
+            if (real < 0d)
+            {
+                sign     = true;
+                mantissa = -real;
+            }
+            else
+            {
+                sign     = false;
+                mantissa = real;
+            }
+
+            int exponent = 0;
+
+            while (mantissa < 1d)
+            {
+                mantissa *= 2d;
+                exponent--;
+            }
+
+            while (mantissa >= 2d)
+            {
+                mantissa /= 2d;
+                exponent++;
+            }
+
+            uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0);
+
+            if (biasedExp == 0u)
+            {
+                mantissa /= Math.Pow(2d, minimumExp - exponent);
+            }
+
+            uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f));
+            double error = mantissa * Math.Pow(2d, f) - (double)intMant;
+
+            if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0))
+            {
+                FPProcessException(FPException.Underflow, context);
+            }
+
+            bool overflowToInf;
+            bool roundUp;
+
+            switch (context.Fpcr.GetRoundingMode())
+            {
+                default:
+                case FPRoundingMode.ToNearest:
+                    roundUp       = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u));
+                    overflowToInf = true;
+                    break;
+
+                case FPRoundingMode.TowardsPlusInfinity:
+                    roundUp       = (error != 0d && !sign);
+                    overflowToInf = !sign;
+                    break;
+
+                case FPRoundingMode.TowardsMinusInfinity:
+                    roundUp       = (error != 0d && sign);
+                    overflowToInf = sign;
+                    break;
+
+                case FPRoundingMode.TowardsZero:
+                    roundUp       = false;
+                    overflowToInf = false;
+                    break;
+            }
+
+            if (roundUp)
+            {
+                intMant++;
+
+                if (intMant == 1u << f)
+                {
+                    biasedExp = 1u;
+                }
+
+                if (intMant == 1u << (f + 1))
+                {
+                    biasedExp++;
+                    intMant >>= 1;
+                }
+            }
+
+            ushort resultBits;
+
+            if ((context.Fpcr & FPCR.Ahp) == 0)
+            {
+                if (biasedExp >= (1u << e) - 1u)
+                {
+                    resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+                    FPProcessException(FPException.Overflow, context);
+
+                    error = 1d;
+                }
+                else
+                {
+                    resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+                }
+            }
+            else
+            {
+                if (biasedExp >= 1u << e)
+                {
+                    resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
+
+                    FPProcessException(FPException.InvalidOp, context);
+
+                    error = 0d;
+                }
+                else
+                {
+                    resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu));
+                }
+            }
+
+            if (error != 0d)
+            {
+                FPProcessException(FPException.Inexact, context);
+            }
+
+            return resultBits;
+        }
+
+        private static ushort FPConvertNaN(uint valueBits)
+        {
+            return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13);
+        }
+
+        private static void FPProcessException(FPException exc, ExecutionContext context)
+        {
+            int enable = (int)exc + 8;
+
+            if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+            {
+                throw new NotImplementedException("Floating-point trap handling.");
+            }
+            else
+            {
+                context.Fpsr |= (FPSR)(1 << (int)exc);
+            }
+        }
+    }
+
+    static class SoftFloat32
+    {
+        public static float FPAdd(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+            float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if (inf1 && inf2 && sign1 == !sign2)
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if ((inf1 && !sign1) || (inf2 && !sign2))
+                {
+                    result = FPInfinity(false);
+                }
+                else if ((inf1 && sign1) || (inf2 && sign2))
+                {
+                    result = FPInfinity(true);
+                }
+                else if (zero1 && zero2 && sign1 == sign2)
+                {
+                    result = FPZero(sign1);
+                }
+                else
+                {
+                    result = value1 + value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0f);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static int FPCompare(float value1, float value2, bool signalNaNs)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context);
+
+            int result;
+
+            if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+            {
+                result = 0b0011;
+
+                if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+                {
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+            }
+            else
+            {
+                if (value1 == value2)
+                {
+                    result = 0b0110;
+                }
+                else if (value1 < value2)
+                {
+                    result = 0b1000;
+                }
+                else
+                {
+                    result = 0b0010;
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPCompareEQ(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            float result;
+
+            if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+            {
+                result = ZerosOrOnes(false);
+
+                if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+                {
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+            }
+            else
+            {
+                result = ZerosOrOnes(value1 == value2);
+            }
+
+            return result;
+        }
+
+        public static float FPCompareGE(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            float result;
+
+            if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+            {
+                result = ZerosOrOnes(false);
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+            else
+            {
+                result = ZerosOrOnes(value1 >= value2);
+            }
+
+            return result;
+        }
+
+        public static float FPCompareGT(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            float result;
+
+            if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+            {
+                result = ZerosOrOnes(false);
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+            else
+            {
+                result = ZerosOrOnes(value1 > value2);
+            }
+
+            return result;
+        }
+
+        public static float FPCompareLE(float value1, float value2)
+        {
+            return FPCompareGE(value2, value1);
+        }
+
+        public static float FPCompareLT(float value1, float value2)
+        {
+            return FPCompareGT(value2, value1);
+        }
+
+        public static float FPDiv(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+            float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && inf2) || (zero1 && zero2))
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if (inf1 || zero2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+
+                    if (!inf1)
+                    {
+                        FPProcessException(FPException.DivideByZero, context);
+                    }
+                }
+                else if (zero1 || inf2)
+                {
+                    result = FPZero(sign1 ^ sign2);
+                }
+                else
+                {
+                    result = value1 / value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0f);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPMax(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+            float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                if (value1 > value2)
+                {
+                    if (type1 == FPType.Infinity)
+                    {
+                        result = FPInfinity(sign1);
+                    }
+                    else if (type1 == FPType.Zero)
+                    {
+                        result = FPZero(sign1 && sign2);
+                    }
+                    else
+                    {
+                        result = value1;
+                    }
+                }
+                else
+                {
+                    if (type2 == FPType.Infinity)
+                    {
+                        result = FPInfinity(sign2);
+                    }
+                    else if (type2 == FPType.Zero)
+                    {
+                        result = FPZero(sign1 && sign2);
+                    }
+                    else
+                    {
+                        result = value2;
+
+                        if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                        {
+                            context.Fpsr |= FPSR.Ufc;
+
+                            result = FPZero(result < 0f);
+                        }
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPMaxNum(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+            {
+                value1 = FPInfinity(true);
+            }
+            else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+            {
+                value2 = FPInfinity(true);
+            }
+
+            return FPMax(value1, value2);
+        }
+
+        public static float FPMin(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+            float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                if (value1 < value2)
+                {
+                    if (type1 == FPType.Infinity)
+                    {
+                        result = FPInfinity(sign1);
+                    }
+                    else if (type1 == FPType.Zero)
+                    {
+                        result = FPZero(sign1 || sign2);
+                    }
+                    else
+                    {
+                        result = value1;
+                    }
+                }
+                else
+                {
+                    if (type2 == FPType.Infinity)
+                    {
+                        result = FPInfinity(sign2);
+                    }
+                    else if (type2 == FPType.Zero)
+                    {
+                        result = FPZero(sign1 || sign2);
+                    }
+                    else
+                    {
+                        result = value2;
+
+                        if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                        {
+                            context.Fpsr |= FPSR.Ufc;
+
+                            result = FPZero(result < 0f);
+                        }
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPMinNum(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+            {
+                value1 = FPInfinity(false);
+            }
+            else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+            {
+                value2 = FPInfinity(false);
+            }
+
+            return FPMin(value1, value2);
+        }
+
+        public static float FPMul(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+            float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && zero2) || (zero1 && inf2))
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if (inf1 || inf2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+                }
+                else if (zero1 || zero2)
+                {
+                    result = FPZero(sign1 ^ sign2);
+                }
+                else
+                {
+                    result = value1 * value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0f);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPMulAdd(float valueA, float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context);
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1,    context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2,    context);
+
+            bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+            bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+            float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context);
+
+            if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+            {
+                result = FPDefaultNaN();
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+
+            if (!done)
+            {
+                bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+                bool signP = sign1 ^  sign2;
+                bool infP  = inf1  || inf2;
+                bool zeroP = zero1 || zero2;
+
+                if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if ((infA && !signA) || (infP && !signP))
+                {
+                    result = FPInfinity(false);
+                }
+                else if ((infA && signA) || (infP && signP))
+                {
+                    result = FPInfinity(true);
+                }
+                else if (zeroA && zeroP && signA == signP)
+                {
+                    result = FPZero(signA);
+                }
+                else
+                {
+                    // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T);
+                    // https://github.com/dotnet/corefx/issues/31903
+
+                    result = valueA + (value1 * value2);
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0f);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPMulSub(float valueA, float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPNeg();
+
+            return FPMulAdd(valueA, value1, value2);
+        }
+
+        public static float FPMulX(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+            float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && zero2) || (zero1 && inf2))
+                {
+                    result = FPTwo(sign1 ^ sign2);
+                }
+                else if (inf1 || inf2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+                }
+                else if (zero1 || zero2)
+                {
+                    result = FPZero(sign1 ^ sign2);
+                }
+                else
+                {
+                    result = value1 * value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0f);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPRecipEstimate(float value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+            float result;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                result = FPProcessNaN(type, op, context);
+            }
+            else if (type == FPType.Infinity)
+            {
+                result = FPZero(sign);
+            }
+            else if (type == FPType.Zero)
+            {
+                result = FPInfinity(sign);
+
+                FPProcessException(FPException.DivideByZero, context);
+            }
+            else if (MathF.Abs(value) < MathF.Pow(2f, -128))
+            {
+                bool overflowToInf;
+
+                switch (context.Fpcr.GetRoundingMode())
+                {
+                    default:
+                    case FPRoundingMode.ToNearest:            overflowToInf = true;  break;
+                    case FPRoundingMode.TowardsPlusInfinity:  overflowToInf = !sign; break;
+                    case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign;  break;
+                    case FPRoundingMode.TowardsZero:          overflowToInf = false; break;
+                }
+
+                result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+                FPProcessException(FPException.Overflow, context);
+                FPProcessException(FPException.Inexact,  context);
+            }
+            else if ((context.Fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126)))
+            {
+                result = FPZero(sign);
+
+                context.Fpsr |= FPSR.Ufc;
+            }
+            else
+            {
+                ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+                uint exp = (op & 0x7F800000u) >> 23;
+
+                if (exp == 0u)
+                {
+                    if ((fraction & 0x0008000000000000ul) == 0ul)
+                    {
+                        fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+                        exp -= 1u;
+                    }
+                    else
+                    {
+                        fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                    }
+                }
+
+                uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+                uint resultExp = 253u - exp;
+
+                uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+                fraction = (ulong)(estimate & 0xFFu) << 44;
+
+                if (resultExp == 0u)
+                {
+                    fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+                }
+                else if (resultExp + 1u == 0u)
+                {
+                    fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+                    resultExp = 0u;
+                }
+
+                result = BitConverter.Int32BitsToSingle(
+                    (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu));
+            }
+
+            return result;
+        }
+
+        public static float FPRecipStepFused(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPNeg();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+            float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && zero2) || (zero1 && inf2))
+                {
+                    result = FPTwo(false);
+                }
+                else if (inf1 || inf2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+                }
+                else
+                {
+                    // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T);
+                    // https://github.com/dotnet/corefx/issues/31903
+
+                    result = 2f + (value1 * value2);
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0f);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPRecpX(float value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+            float result;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                result = FPProcessNaN(type, op, context);
+            }
+            else
+            {
+                uint notExp = (~op >> 23) & 0xFFu;
+                uint maxExp = 0xFEu;
+
+                result = BitConverter.Int32BitsToSingle(
+                    (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23));
+            }
+
+            return result;
+        }
+
+        public static float FPRSqrtEstimate(float value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+            float result;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                result = FPProcessNaN(type, op, context);
+            }
+            else if (type == FPType.Zero)
+            {
+                result = FPInfinity(sign);
+
+                FPProcessException(FPException.DivideByZero, context);
+            }
+            else if (sign)
+            {
+                result = FPDefaultNaN();
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+            else if (type == FPType.Infinity)
+            {
+                result = FPZero(false);
+            }
+            else
+            {
+                ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+                uint exp = (op & 0x7F800000u) >> 23;
+
+                if (exp == 0u)
+                {
+                    while ((fraction & 0x0008000000000000ul) == 0ul)
+                    {
+                        fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                        exp -= 1u;
+                    }
+
+                    fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                }
+
+                uint scaled;
+
+                if ((exp & 1u) == 0u)
+                {
+                    scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+                }
+                else
+                {
+                    scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+                }
+
+                uint resultExp = (380u - exp) >> 1;
+
+                uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+                result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15));
+            }
+
+            return result;
+        }
+
+        public static float FPRSqrtStepFused(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPNeg();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+            float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && zero2) || (zero1 && inf2))
+                {
+                    result = FPOnePointFive(false);
+                }
+                else if (inf1 || inf2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+                }
+                else
+                {
+                    // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T);
+                    // https://github.com/dotnet/corefx/issues/31903
+
+                    result = (3f + (value1 * value2)) / 2f;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0f);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPSqrt(float value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value = value.FPUnpack(out FPType type, out bool sign, out uint op, context);
+
+            float result;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                result = FPProcessNaN(type, op, context);
+            }
+            else if (type == FPType.Zero)
+            {
+                result = FPZero(sign);
+            }
+            else if (type == FPType.Infinity && !sign)
+            {
+                result = FPInfinity(sign);
+            }
+            else if (sign)
+            {
+                result = FPDefaultNaN();
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+            else
+            {
+                result = MathF.Sqrt(value);
+
+                if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                {
+                    context.Fpsr |= FPSR.Ufc;
+
+                    result = FPZero(result < 0f);
+                }
+            }
+
+            return result;
+        }
+
+        public static float FPSub(float value1, float value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context);
+
+            float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if (inf1 && inf2 && sign1 == sign2)
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if ((inf1 && !sign1) || (inf2 && sign2))
+                {
+                    result = FPInfinity(false);
+                }
+                else if ((inf1 && sign1) || (inf2 && !sign2))
+                {
+                    result = FPInfinity(true);
+                }
+                else if (zero1 && zero2 && sign1 == !sign2)
+                {
+                    result = FPZero(sign1);
+                }
+                else
+                {
+                    result = value1 - value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0f);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        private static float FPDefaultNaN()
+        {
+            return -float.NaN;
+        }
+
+        private static float FPInfinity(bool sign)
+        {
+            return sign ? float.NegativeInfinity : float.PositiveInfinity;
+        }
+
+        private static float FPZero(bool sign)
+        {
+            return sign ? -0f : +0f;
+        }
+
+        private static float FPMaxNormal(bool sign)
+        {
+            return sign ? float.MinValue : float.MaxValue;
+        }
+
+        private static float FPTwo(bool sign)
+        {
+            return sign ? -2f : +2f;
+        }
+
+        private static float FPOnePointFive(bool sign)
+        {
+            return sign ? -1.5f : +1.5f;
+        }
+
+        private static float FPNeg(this float value)
+        {
+            return -value;
+        }
+
+        private static float ZerosOrOnes(bool ones)
+        {
+            return BitConverter.Int32BitsToSingle(ones ? -1 : 0);
+        }
+
+        private static float FPUnpack(
+            this float value,
+            out FPType type,
+            out bool sign,
+            out uint valueBits,
+            ExecutionContext context)
+        {
+            valueBits = (uint)BitConverter.SingleToInt32Bits(value);
+
+            sign = (~valueBits & 0x80000000u) == 0u;
+
+            if ((valueBits & 0x7F800000u) == 0u)
+            {
+                if ((valueBits & 0x007FFFFFu) == 0u || (context.Fpcr & FPCR.Fz) != 0)
+                {
+                    type  = FPType.Zero;
+                    value = FPZero(sign);
+
+                    if ((valueBits & 0x007FFFFFu) != 0u)
+                    {
+                        FPProcessException(FPException.InputDenorm, context);
+                    }
+                }
+                else
+                {
+                    type = FPType.Nonzero;
+                }
+            }
+            else if ((~valueBits & 0x7F800000u) == 0u)
+            {
+                if ((valueBits & 0x007FFFFFu) == 0u)
+                {
+                    type = FPType.Infinity;
+                }
+                else
+                {
+                    type  = (~valueBits & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN;
+                    value = FPZero(sign);
+                }
+            }
+            else
+            {
+                type = FPType.Nonzero;
+            }
+
+            return value;
+        }
+
+        private static float FPProcessNaNs(
+            FPType type1,
+            FPType type2,
+            uint op1,
+            uint op2,
+            out bool done,
+            ExecutionContext context)
+        {
+            done = true;
+
+            if (type1 == FPType.SNaN)
+            {
+                return FPProcessNaN(type1, op1, context);
+            }
+            else if (type2 == FPType.SNaN)
+            {
+                return FPProcessNaN(type2, op2, context);
+            }
+            else if (type1 == FPType.QNaN)
+            {
+                return FPProcessNaN(type1, op1, context);
+            }
+            else if (type2 == FPType.QNaN)
+            {
+                return FPProcessNaN(type2, op2, context);
+            }
+
+            done = false;
+
+            return FPZero(false);
+        }
+
+        private static float FPProcessNaNs3(
+            FPType type1,
+            FPType type2,
+            FPType type3,
+            uint op1,
+            uint op2,
+            uint op3,
+            out bool done,
+            ExecutionContext context)
+        {
+            done = true;
+
+            if (type1 == FPType.SNaN)
+            {
+                return FPProcessNaN(type1, op1, context);
+            }
+            else if (type2 == FPType.SNaN)
+            {
+                return FPProcessNaN(type2, op2, context);
+            }
+            else if (type3 == FPType.SNaN)
+            {
+                return FPProcessNaN(type3, op3, context);
+            }
+            else if (type1 == FPType.QNaN)
+            {
+                return FPProcessNaN(type1, op1, context);
+            }
+            else if (type2 == FPType.QNaN)
+            {
+                return FPProcessNaN(type2, op2, context);
+            }
+            else if (type3 == FPType.QNaN)
+            {
+                return FPProcessNaN(type3, op3, context);
+            }
+
+            done = false;
+
+            return FPZero(false);
+        }
+
+        private static float FPProcessNaN(FPType type, uint op, ExecutionContext context)
+        {
+            if (type == FPType.SNaN)
+            {
+                op |= 1u << 22;
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+
+            if ((context.Fpcr & FPCR.Dn) != 0)
+            {
+                return FPDefaultNaN();
+            }
+
+            return BitConverter.Int32BitsToSingle((int)op);
+        }
+
+        private static void FPProcessException(FPException exc, ExecutionContext context)
+        {
+            int enable = (int)exc + 8;
+
+            if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+            {
+                throw new NotImplementedException("Floating-point trap handling.");
+            }
+            else
+            {
+                context.Fpsr |= (FPSR)(1 << (int)exc);
+            }
+        }
+    }
+
+    static class SoftFloat64
+    {
+        public static double FPAdd(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+            double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if (inf1 && inf2 && sign1 == !sign2)
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if ((inf1 && !sign1) || (inf2 && !sign2))
+                {
+                    result = FPInfinity(false);
+                }
+                else if ((inf1 && sign1) || (inf2 && sign2))
+                {
+                    result = FPInfinity(true);
+                }
+                else if (zero1 && zero2 && sign1 == sign2)
+                {
+                    result = FPZero(sign1);
+                }
+                else
+                {
+                    result = value1 + value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0d);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static int FPCompare(double value1, double value2, bool signalNaNs)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context);
+
+            int result;
+
+            if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+            {
+                result = 0b0011;
+
+                if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs)
+                {
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+            }
+            else
+            {
+                if (value1 == value2)
+                {
+                    result = 0b0110;
+                }
+                else if (value1 < value2)
+                {
+                    result = 0b1000;
+                }
+                else
+                {
+                    result = 0b0010;
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPCompareEQ(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            double result;
+
+            if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+            {
+                result = ZerosOrOnes(false);
+
+                if (type1 == FPType.SNaN || type2 == FPType.SNaN)
+                {
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+            }
+            else
+            {
+                result = ZerosOrOnes(value1 == value2);
+            }
+
+            return result;
+        }
+
+        public static double FPCompareGE(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            double result;
+
+            if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+            {
+                result = ZerosOrOnes(false);
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+            else
+            {
+                result = ZerosOrOnes(value1 >= value2);
+            }
+
+            return result;
+        }
+
+        public static double FPCompareGT(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2 = value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            double result;
+
+            if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN)
+            {
+                result = ZerosOrOnes(false);
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+            else
+            {
+                result = ZerosOrOnes(value1 > value2);
+            }
+
+            return result;
+        }
+
+        public static double FPCompareLE(double value1, double value2)
+        {
+            return FPCompareGE(value2, value1);
+        }
+
+        public static double FPCompareLT(double value1, double value2)
+        {
+            return FPCompareGT(value2, value1);
+        }
+
+        public static double FPDiv(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+            double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && inf2) || (zero1 && zero2))
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if (inf1 || zero2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+
+                    if (!inf1)
+                    {
+                        FPProcessException(FPException.DivideByZero, context);
+                    }
+                }
+                else if (zero1 || inf2)
+                {
+                    result = FPZero(sign1 ^ sign2);
+                }
+                else
+                {
+                    result = value1 / value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0d);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPMax(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+            double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                if (value1 > value2)
+                {
+                    if (type1 == FPType.Infinity)
+                    {
+                        result = FPInfinity(sign1);
+                    }
+                    else if (type1 == FPType.Zero)
+                    {
+                        result = FPZero(sign1 && sign2);
+                    }
+                    else
+                    {
+                        result = value1;
+                    }
+                }
+                else
+                {
+                    if (type2 == FPType.Infinity)
+                    {
+                        result = FPInfinity(sign2);
+                    }
+                    else if (type2 == FPType.Zero)
+                    {
+                        result = FPZero(sign1 && sign2);
+                    }
+                    else
+                    {
+                        result = value2;
+
+                        if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                        {
+                            context.Fpsr |= FPSR.Ufc;
+
+                            result = FPZero(result < 0d);
+                        }
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPMaxNum(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+            {
+                value1 = FPInfinity(true);
+            }
+            else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+            {
+                value2 = FPInfinity(true);
+            }
+
+            return FPMax(value1, value2);
+        }
+
+        public static double FPMin(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+            double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                if (value1 < value2)
+                {
+                    if (type1 == FPType.Infinity)
+                    {
+                        result = FPInfinity(sign1);
+                    }
+                    else if (type1 == FPType.Zero)
+                    {
+                        result = FPZero(sign1 || sign2);
+                    }
+                    else
+                    {
+                        result = value1;
+                    }
+                }
+                else
+                {
+                    if (type2 == FPType.Infinity)
+                    {
+                        result = FPInfinity(sign2);
+                    }
+                    else if (type2 == FPType.Zero)
+                    {
+                        result = FPZero(sign1 || sign2);
+                    }
+                    else
+                    {
+                        result = value2;
+
+                        if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                        {
+                            context.Fpsr |= FPSR.Ufc;
+
+                            result = FPZero(result < 0d);
+                        }
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPMinNum(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1.FPUnpack(out FPType type1, out _, out _, context);
+            value2.FPUnpack(out FPType type2, out _, out _, context);
+
+            if (type1 == FPType.QNaN && type2 != FPType.QNaN)
+            {
+                value1 = FPInfinity(false);
+            }
+            else if (type1 != FPType.QNaN && type2 == FPType.QNaN)
+            {
+                value2 = FPInfinity(false);
+            }
+
+            return FPMin(value1, value2);
+        }
+
+        public static double FPMul(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+            double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && zero2) || (zero1 && inf2))
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if (inf1 || inf2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+                }
+                else if (zero1 || zero2)
+                {
+                    result = FPZero(sign1 ^ sign2);
+                }
+                else
+                {
+                    result = value1 * value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0d);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPMulAdd(double valueA, double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context);
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1,    context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2,    context);
+
+            bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+            bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+            double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context);
+
+            if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2)))
+            {
+                result = FPDefaultNaN();
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+
+            if (!done)
+            {
+                bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero;
+
+                bool signP = sign1 ^  sign2;
+                bool infP  = inf1  || inf2;
+                bool zeroP = zero1 || zero2;
+
+                if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP))
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if ((infA && !signA) || (infP && !signP))
+                {
+                    result = FPInfinity(false);
+                }
+                else if ((infA && signA) || (infP && signP))
+                {
+                    result = FPInfinity(true);
+                }
+                else if (zeroA && zeroP && signA == signP)
+                {
+                    result = FPZero(signA);
+                }
+                else
+                {
+                    // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T);
+                    // https://github.com/dotnet/corefx/issues/31903
+
+                    result = valueA + (value1 * value2);
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0d);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPMulSub(double valueA, double value1, double value2)
+        {
+            value1 = value1.FPNeg();
+
+            return FPMulAdd(valueA, value1, value2);
+        }
+
+        public static double FPMulX(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+            double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && zero2) || (zero1 && inf2))
+                {
+                    result = FPTwo(sign1 ^ sign2);
+                }
+                else if (inf1 || inf2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+                }
+                else if (zero1 || zero2)
+                {
+                    result = FPZero(sign1 ^ sign2);
+                }
+                else
+                {
+                    result = value1 * value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0d);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPRecipEstimate(double value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+            double result;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                result = FPProcessNaN(type, op, context);
+            }
+            else if (type == FPType.Infinity)
+            {
+                result = FPZero(sign);
+            }
+            else if (type == FPType.Zero)
+            {
+                result = FPInfinity(sign);
+
+                FPProcessException(FPException.DivideByZero, context);
+            }
+            else if (Math.Abs(value) < Math.Pow(2d, -1024))
+            {
+                bool overflowToInf;
+
+                switch (context.Fpcr.GetRoundingMode())
+                {
+                    default:
+                    case FPRoundingMode.ToNearest:            overflowToInf = true;  break;
+                    case FPRoundingMode.TowardsPlusInfinity:  overflowToInf = !sign; break;
+                    case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign;  break;
+                    case FPRoundingMode.TowardsZero:          overflowToInf = false; break;
+                }
+
+                result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+                FPProcessException(FPException.Overflow, context);
+                FPProcessException(FPException.Inexact,  context);
+            }
+            else if ((context.Fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022)))
+            {
+                result = FPZero(sign);
+
+                context.Fpsr |= FPSR.Ufc;
+            }
+            else
+            {
+                ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+                uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+                if (exp == 0u)
+                {
+                    if ((fraction & 0x0008000000000000ul) == 0ul)
+                    {
+                        fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+                        exp -= 1u;
+                    }
+                    else
+                    {
+                        fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                    }
+                }
+
+                uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+                uint resultExp = 2045u - exp;
+
+                uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+                fraction = (ulong)(estimate & 0xFFu) << 44;
+
+                if (resultExp == 0u)
+                {
+                    fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+                }
+                else if (resultExp + 1u == 0u)
+                {
+                    fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+                    resultExp = 0u;
+                }
+
+                result = BitConverter.Int64BitsToDouble(
+                    (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul)));
+            }
+
+            return result;
+        }
+
+        public static double FPRecipStepFused(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPNeg();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+            double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && zero2) || (zero1 && inf2))
+                {
+                    result = FPTwo(false);
+                }
+                else if (inf1 || inf2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+                }
+                else
+                {
+                    // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T);
+                    // https://github.com/dotnet/corefx/issues/31903
+
+                    result = 2d + (value1 * value2);
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0d);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPRecpX(double value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+            double result;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                result = FPProcessNaN(type, op, context);
+            }
+            else
+            {
+                ulong notExp = (~op >> 52) & 0x7FFul;
+                ulong maxExp = 0x7FEul;
+
+                result = BitConverter.Int64BitsToDouble(
+                    (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52));
+            }
+
+            return result;
+        }
+
+        public static double FPRSqrtEstimate(double value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+            double result;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                result = FPProcessNaN(type, op, context);
+            }
+            else if (type == FPType.Zero)
+            {
+                result = FPInfinity(sign);
+
+                FPProcessException(FPException.DivideByZero, context);
+            }
+            else if (sign)
+            {
+                result = FPDefaultNaN();
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+            else if (type == FPType.Infinity)
+            {
+                result = FPZero(false);
+            }
+            else
+            {
+                ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+                uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+                if (exp == 0u)
+                {
+                    while ((fraction & 0x0008000000000000ul) == 0ul)
+                    {
+                        fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                        exp -= 1u;
+                    }
+
+                    fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+                }
+
+                uint scaled;
+
+                if ((exp & 1u) == 0u)
+                {
+                    scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+                }
+                else
+                {
+                    scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+                }
+
+                uint resultExp = (3068u - exp) >> 1;
+
+                uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+                result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44));
+            }
+
+            return result;
+        }
+
+        public static double FPRSqrtStepFused(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPNeg();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+            double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if ((inf1 && zero2) || (zero1 && inf2))
+                {
+                    result = FPOnePointFive(false);
+                }
+                else if (inf1 || inf2)
+                {
+                    result = FPInfinity(sign1 ^ sign2);
+                }
+                else
+                {
+                    // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T);
+                    // https://github.com/dotnet/corefx/issues/31903
+
+                    result = (3d + (value1 * value2)) / 2d;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0d);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPSqrt(double value)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context);
+
+            double result;
+
+            if (type == FPType.SNaN || type == FPType.QNaN)
+            {
+                result = FPProcessNaN(type, op, context);
+            }
+            else if (type == FPType.Zero)
+            {
+                result = FPZero(sign);
+            }
+            else if (type == FPType.Infinity && !sign)
+            {
+                result = FPInfinity(sign);
+            }
+            else if (sign)
+            {
+                result = FPDefaultNaN();
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+            else
+            {
+                result = Math.Sqrt(value);
+
+                if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                {
+                    context.Fpsr |= FPSR.Ufc;
+
+                    result = FPZero(result < 0d);
+                }
+            }
+
+            return result;
+        }
+
+        public static double FPSub(double value1, double value2)
+        {
+            ExecutionContext context = NativeInterface.GetContext();
+
+            value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context);
+            value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context);
+
+            double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context);
+
+            if (!done)
+            {
+                bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero;
+                bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero;
+
+                if (inf1 && inf2 && sign1 == sign2)
+                {
+                    result = FPDefaultNaN();
+
+                    FPProcessException(FPException.InvalidOp, context);
+                }
+                else if ((inf1 && !sign1) || (inf2 && sign2))
+                {
+                    result = FPInfinity(false);
+                }
+                else if ((inf1 && sign1) || (inf2 && !sign2))
+                {
+                    result = FPInfinity(true);
+                }
+                else if (zero1 && zero2 && sign1 == !sign2)
+                {
+                    result = FPZero(sign1);
+                }
+                else
+                {
+                    result = value1 - value2;
+
+                    if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result))
+                    {
+                        context.Fpsr |= FPSR.Ufc;
+
+                        result = FPZero(result < 0d);
+                    }
+                }
+            }
+
+            return result;
+        }
+
+        private static double FPDefaultNaN()
+        {
+            return -double.NaN;
+        }
+
+        private static double FPInfinity(bool sign)
+        {
+            return sign ? double.NegativeInfinity : double.PositiveInfinity;
+        }
+
+        private static double FPZero(bool sign)
+        {
+            return sign ? -0d : +0d;
+        }
+
+        private static double FPMaxNormal(bool sign)
+        {
+            return sign ? double.MinValue : double.MaxValue;
+        }
+
+        private static double FPTwo(bool sign)
+        {
+            return sign ? -2d : +2d;
+        }
+
+        private static double FPOnePointFive(bool sign)
+        {
+            return sign ? -1.5d : +1.5d;
+        }
+
+        private static double FPNeg(this double value)
+        {
+            return -value;
+        }
+
+        private static double ZerosOrOnes(bool ones)
+        {
+            return BitConverter.Int64BitsToDouble(ones ? -1L : 0L);
+        }
+
+        private static double FPUnpack(
+            this double value,
+            out FPType type,
+            out bool sign,
+            out ulong valueBits,
+            ExecutionContext context)
+        {
+            valueBits = (ulong)BitConverter.DoubleToInt64Bits(value);
+
+            sign = (~valueBits & 0x8000000000000000ul) == 0ul;
+
+            if ((valueBits & 0x7FF0000000000000ul) == 0ul)
+            {
+                if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (context.Fpcr & FPCR.Fz) != 0)
+                {
+                    type  = FPType.Zero;
+                    value = FPZero(sign);
+
+                    if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul)
+                    {
+                        FPProcessException(FPException.InputDenorm, context);
+                    }
+                }
+                else
+                {
+                    type = FPType.Nonzero;
+                }
+            }
+            else if ((~valueBits & 0x7FF0000000000000ul) == 0ul)
+            {
+                if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul)
+                {
+                    type = FPType.Infinity;
+                }
+                else
+                {
+                    type  = (~valueBits & 0x0008000000000000ul) == 0ul ? FPType.QNaN : FPType.SNaN;
+                    value = FPZero(sign);
+                }
+            }
+            else
+            {
+                type = FPType.Nonzero;
+            }
+
+            return value;
+        }
+
+        private static double FPProcessNaNs(
+            FPType type1,
+            FPType type2,
+            ulong op1,
+            ulong op2,
+            out bool done,
+            ExecutionContext context)
+        {
+            done = true;
+
+            if (type1 == FPType.SNaN)
+            {
+                return FPProcessNaN(type1, op1, context);
+            }
+            else if (type2 == FPType.SNaN)
+            {
+                return FPProcessNaN(type2, op2, context);
+            }
+            else if (type1 == FPType.QNaN)
+            {
+                return FPProcessNaN(type1, op1, context);
+            }
+            else if (type2 == FPType.QNaN)
+            {
+                return FPProcessNaN(type2, op2, context);
+            }
+
+            done = false;
+
+            return FPZero(false);
+        }
+
+        private static double FPProcessNaNs3(
+            FPType type1,
+            FPType type2,
+            FPType type3,
+            ulong op1,
+            ulong op2,
+            ulong op3,
+            out bool done,
+            ExecutionContext context)
+        {
+            done = true;
+
+            if (type1 == FPType.SNaN)
+            {
+                return FPProcessNaN(type1, op1, context);
+            }
+            else if (type2 == FPType.SNaN)
+            {
+                return FPProcessNaN(type2, op2, context);
+            }
+            else if (type3 == FPType.SNaN)
+            {
+                return FPProcessNaN(type3, op3, context);
+            }
+            else if (type1 == FPType.QNaN)
+            {
+                return FPProcessNaN(type1, op1, context);
+            }
+            else if (type2 == FPType.QNaN)
+            {
+                return FPProcessNaN(type2, op2, context);
+            }
+            else if (type3 == FPType.QNaN)
+            {
+                return FPProcessNaN(type3, op3, context);
+            }
+
+            done = false;
+
+            return FPZero(false);
+        }
+
+        private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context)
+        {
+            if (type == FPType.SNaN)
+            {
+                op |= 1ul << 51;
+
+                FPProcessException(FPException.InvalidOp, context);
+            }
+
+            if ((context.Fpcr & FPCR.Dn) != 0)
+            {
+                return FPDefaultNaN();
+            }
+
+            return BitConverter.Int64BitsToDouble((long)op);
+        }
+
+        private static void FPProcessException(FPException exc, ExecutionContext context)
+        {
+            int enable = (int)exc + 8;
+
+            if ((context.Fpcr & (FPCR)(1 << enable)) != 0)
+            {
+                throw new NotImplementedException("Floating-point trap handling.");
+            }
+            else
+            {
+                context.Fpsr |= (FPSR)(1 << (int)exc);
+            }
+        }
+    }
+}
diff --git a/ARMeilleure/IntermediateRepresentation/BasicBlock.cs b/ARMeilleure/IntermediateRepresentation/BasicBlock.cs
new file mode 100644
index 000000000..06839f309
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/BasicBlock.cs
@@ -0,0 +1,83 @@
+using System.Collections.Generic;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+    class BasicBlock
+    {
+        public int Index { get; set; }
+
+        public LinkedListNode<BasicBlock> Node { get; set; }
+
+        public LinkedList<Node> Operations { get; }
+
+        private BasicBlock _next;
+        private BasicBlock _branch;
+
+        public BasicBlock Next
+        {
+            get => _next;
+            set => _next = AddSuccessor(_next, value);
+        }
+
+        public BasicBlock Branch
+        {
+            get => _branch;
+            set => _branch = AddSuccessor(_branch, value);
+        }
+
+        public List<BasicBlock> Predecessors { get; }
+
+        public HashSet<BasicBlock> DominanceFrontiers { get; }
+
+        public BasicBlock ImmediateDominator { get; set; }
+
+        public BasicBlock()
+        {
+            Operations = new LinkedList<Node>();
+
+            Predecessors = new List<BasicBlock>();
+
+            DominanceFrontiers = new HashSet<BasicBlock>();
+
+            Index = -1;
+        }
+
+        public BasicBlock(int index) : this()
+        {
+            Index = index;
+        }
+
+        private BasicBlock AddSuccessor(BasicBlock oldBlock, BasicBlock newBlock)
+        {
+            oldBlock?.Predecessors.Remove(this);
+            newBlock?.Predecessors.Add(this);
+
+            return newBlock;
+        }
+
+        public void Append(Node node)
+        {
+            // If the branch block is not null, then the list of operations
+            // should end with a branch instruction. We insert the new operation
+            // before this branch.
+            if (_branch != null || (Operations.Last != null && IsLeafBlock()))
+            {
+                Operations.AddBefore(Operations.Last, node);
+            }
+            else
+            {
+                Operations.AddLast(node);
+            }
+        }
+
+        private bool IsLeafBlock()
+        {
+            return _branch == null && _next == null;
+        }
+
+        public Node GetLastOp()
+        {
+            return Operations.Last?.Value;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Instruction.cs b/ARMeilleure/IntermediateRepresentation/Instruction.cs
new file mode 100644
index 000000000..4c4ecb8f2
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Instruction.cs
@@ -0,0 +1,79 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+    enum Instruction
+    {
+        Add,
+        BitwiseAnd,
+        BitwiseExclusiveOr,
+        BitwiseNot,
+        BitwiseOr,
+        Branch,
+        BranchIfFalse,
+        BranchIfTrue,
+        ByteSwap,
+        Call,
+        CompareAndSwap128,
+        CompareEqual,
+        CompareGreater,
+        CompareGreaterOrEqual,
+        CompareGreaterOrEqualUI,
+        CompareGreaterUI,
+        CompareLess,
+        CompareLessOrEqual,
+        CompareLessOrEqualUI,
+        CompareLessUI,
+        CompareNotEqual,
+        ConditionalSelect,
+        ConvertI64ToI32,
+        ConvertToFP,
+        ConvertToFPUI,
+        Copy,
+        CountLeadingZeros,
+        Divide,
+        DivideUI,
+        Load,
+        Load16,
+        Load8,
+        LoadArgument,
+        Multiply,
+        Multiply64HighSI,
+        Multiply64HighUI,
+        Negate,
+        Return,
+        RotateRight,
+        ShiftLeft,
+        ShiftRightSI,
+        ShiftRightUI,
+        SignExtend16,
+        SignExtend32,
+        SignExtend8,
+        StackAlloc,
+        Store,
+        Store16,
+        Store8,
+        Subtract,
+        VectorCreateScalar,
+        VectorExtract,
+        VectorExtract16,
+        VectorExtract8,
+        VectorInsert,
+        VectorInsert16,
+        VectorInsert8,
+        VectorOne,
+        VectorZero,
+        VectorZeroUpper64,
+        VectorZeroUpper96,
+        ZeroExtend16,
+        ZeroExtend32,
+        ZeroExtend8,
+
+        Clobber,
+        CpuId,
+        Extended,
+        Fill,
+        LoadFromContext,
+        Spill,
+        SpillArg,
+        StoreToContext
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
new file mode 100644
index 000000000..1fe29e855
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs
@@ -0,0 +1,138 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+    enum Intrinsic
+    {
+        X86Addpd,
+        X86Addps,
+        X86Addsd,
+        X86Addss,
+        X86Andnpd,
+        X86Andnps,
+        X86Cmppd,
+        X86Cmpps,
+        X86Cmpsd,
+        X86Cmpss,
+        X86Comisdeq,
+        X86Comisdge,
+        X86Comisdlt,
+        X86Comisseq,
+        X86Comissge,
+        X86Comisslt,
+        X86Cvtdq2pd,
+        X86Cvtdq2ps,
+        X86Cvtpd2dq,
+        X86Cvtpd2ps,
+        X86Cvtps2dq,
+        X86Cvtps2pd,
+        X86Cvtsd2si,
+        X86Cvtsd2ss,
+        X86Cvtss2sd,
+        X86Divpd,
+        X86Divps,
+        X86Divsd,
+        X86Divss,
+        X86Haddpd,
+        X86Haddps,
+        X86Maxpd,
+        X86Maxps,
+        X86Maxsd,
+        X86Maxss,
+        X86Minpd,
+        X86Minps,
+        X86Minsd,
+        X86Minss,
+        X86Movhlps,
+        X86Movlhps,
+        X86Mulpd,
+        X86Mulps,
+        X86Mulsd,
+        X86Mulss,
+        X86Paddb,
+        X86Paddd,
+        X86Paddq,
+        X86Paddw,
+        X86Pand,
+        X86Pandn,
+        X86Pavgb,
+        X86Pavgw,
+        X86Pblendvb,
+        X86Pcmpeqb,
+        X86Pcmpeqd,
+        X86Pcmpeqq,
+        X86Pcmpeqw,
+        X86Pcmpgtb,
+        X86Pcmpgtd,
+        X86Pcmpgtq,
+        X86Pcmpgtw,
+        X86Pmaxsb,
+        X86Pmaxsd,
+        X86Pmaxsw,
+        X86Pmaxub,
+        X86Pmaxud,
+        X86Pmaxuw,
+        X86Pminsb,
+        X86Pminsd,
+        X86Pminsw,
+        X86Pminub,
+        X86Pminud,
+        X86Pminuw,
+        X86Pmovsxbw,
+        X86Pmovsxdq,
+        X86Pmovsxwd,
+        X86Pmovzxbw,
+        X86Pmovzxdq,
+        X86Pmovzxwd,
+        X86Pmulld,
+        X86Pmullw,
+        X86Popcnt,
+        X86Por,
+        X86Pshufb,
+        X86Pslld,
+        X86Pslldq,
+        X86Psllq,
+        X86Psllw,
+        X86Psrad,
+        X86Psraw,
+        X86Psrld,
+        X86Psrlq,
+        X86Psrldq,
+        X86Psrlw,
+        X86Psubb,
+        X86Psubd,
+        X86Psubq,
+        X86Psubw,
+        X86Punpckhbw,
+        X86Punpckhdq,
+        X86Punpckhqdq,
+        X86Punpckhwd,
+        X86Punpcklbw,
+        X86Punpckldq,
+        X86Punpcklqdq,
+        X86Punpcklwd,
+        X86Pxor,
+        X86Rcpps,
+        X86Rcpss,
+        X86Roundpd,
+        X86Roundps,
+        X86Roundsd,
+        X86Roundss,
+        X86Rsqrtps,
+        X86Rsqrtss,
+        X86Shufpd,
+        X86Shufps,
+        X86Sqrtpd,
+        X86Sqrtps,
+        X86Sqrtsd,
+        X86Sqrtss,
+        X86Subpd,
+        X86Subps,
+        X86Subsd,
+        X86Subss,
+        X86Unpckhpd,
+        X86Unpckhps,
+        X86Unpcklpd,
+        X86Unpcklps,
+        X86Xorpd,
+        X86Xorps
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs b/ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs
new file mode 100644
index 000000000..34781b700
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/IntrinsicOperation.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+    class IntrinsicOperation : Operation
+    {
+        public Intrinsic Intrinsic { get; }
+
+        public IntrinsicOperation(Intrinsic intrin, Operand dest, params Operand[] sources) : base(Instruction.Extended, dest, sources)
+        {
+            Intrinsic = intrin;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs b/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs
new file mode 100644
index 000000000..742842fa7
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs
@@ -0,0 +1,25 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+    class MemoryOperand : Operand
+    {
+        public Operand BaseAddress { get; set; }
+        public Operand Index       { get; set; }
+
+        public Multiplier Scale { get; }
+
+        public int Displacement { get; }
+
+        public MemoryOperand(
+            OperandType type,
+            Operand     baseAddress,
+            Operand     index        = null,
+            Multiplier  scale        = Multiplier.x1,
+            int         displacement = 0) : base(OperandKind.Memory, type)
+        {
+            BaseAddress  = baseAddress;
+            Index        = index;
+            Scale        = scale;
+            Displacement = displacement;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Multiplier.cs b/ARMeilleure/IntermediateRepresentation/Multiplier.cs
new file mode 100644
index 000000000..23582072b
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Multiplier.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+    enum Multiplier
+    {
+        x1 = 0,
+        x2 = 1,
+        x4 = 2,
+        x8 = 3
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Node.cs b/ARMeilleure/IntermediateRepresentation/Node.cs
new file mode 100644
index 000000000..167acd072
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Node.cs
@@ -0,0 +1,163 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+    class Node
+    {
+        public Operand Destination
+        {
+            get
+            {
+                return _destinations.Length != 0 ? GetDestination(0) : null;
+            }
+            set
+            {
+                if (value != null)
+                {
+                    SetDestinations(new Operand[] { value });
+                }
+                else
+                {
+                    SetDestinations(new Operand[0]);
+                }
+            }
+        }
+
+        private Operand[] _destinations;
+        private Operand[] _sources;
+
+        private LinkedListNode<Node>[] _asgUseNodes;
+        private LinkedListNode<Node>[] _srcUseNodes;
+
+        public int DestinationsCount => _destinations.Length;
+        public int SourcesCount      => _sources.Length;
+
+        public Node(Operand destination, int sourcesCount)
+        {
+            Destination = destination;
+
+            _sources = new Operand[sourcesCount];
+
+            _srcUseNodes = new LinkedListNode<Node>[sourcesCount];
+        }
+
+        public Node(Operand[] destinations, int sourcesCount)
+        {
+            SetDestinations(destinations ?? throw new ArgumentNullException(nameof(destinations)));
+
+            _sources = new Operand[sourcesCount];
+
+            _srcUseNodes = new LinkedListNode<Node>[sourcesCount];
+        }
+
+        public Operand GetDestination(int index)
+        {
+            return _destinations[index];
+        }
+
+        public Operand GetSource(int index)
+        {
+            return _sources[index];
+        }
+
+        public void SetDestination(int index, Operand destination)
+        {
+            Operand oldOp = _destinations[index];
+
+            if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
+            {
+                oldOp.Assignments.Remove(_asgUseNodes[index]);
+            }
+
+            if (destination != null && destination.Kind == OperandKind.LocalVariable)
+            {
+                _asgUseNodes[index] = destination.Assignments.AddLast(this);
+            }
+
+            _destinations[index] = destination;
+        }
+
+        public void SetSource(int index, Operand source)
+        {
+            Operand oldOp = _sources[index];
+
+            if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
+            {
+                oldOp.Uses.Remove(_srcUseNodes[index]);
+            }
+
+            if (source != null && source.Kind == OperandKind.LocalVariable)
+            {
+                _srcUseNodes[index] = source.Uses.AddLast(this);
+            }
+
+            _sources[index] = source;
+        }
+
+        public void SetDestinations(Operand[] destinations)
+        {
+            if (_destinations != null)
+            {
+                for (int index = 0; index < _destinations.Length; index++)
+                {
+                    Operand oldOp = _destinations[index];
+
+                    if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
+                    {
+                        oldOp.Assignments.Remove(_asgUseNodes[index]);
+                    }
+                }
+
+                _destinations = destinations;
+            }
+            else
+            {
+                _destinations = new Operand[destinations.Length];
+            }
+
+            _asgUseNodes = new LinkedListNode<Node>[destinations.Length];
+
+            for (int index = 0; index < destinations.Length; index++)
+            {
+                Operand newOp = destinations[index];
+
+                _destinations[index] = newOp;
+
+                if (newOp.Kind == OperandKind.LocalVariable)
+                {
+                    _asgUseNodes[index] = newOp.Assignments.AddLast(this);
+                }
+            }
+        }
+
+        public void SetSources(Operand[] sources)
+        {
+            for (int index = 0; index < _sources.Length; index++)
+            {
+                Operand oldOp = _sources[index];
+
+                if (oldOp != null && oldOp.Kind == OperandKind.LocalVariable)
+                {
+                    oldOp.Uses.Remove(_srcUseNodes[index]);
+                }
+            }
+
+            _sources = new Operand[sources.Length];
+
+            _srcUseNodes = new LinkedListNode<Node>[sources.Length];
+
+            for (int index = 0; index < sources.Length; index++)
+            {
+                Operand newOp = sources[index];
+
+                _sources[index] = newOp;
+
+                if (newOp.Kind == OperandKind.LocalVariable)
+                {
+                    _srcUseNodes[index] = newOp.Uses.AddLast(this);
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Operand.cs b/ARMeilleure/IntermediateRepresentation/Operand.cs
new file mode 100644
index 000000000..2df6256fc
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Operand.cs
@@ -0,0 +1,124 @@
+using System;
+using System.Collections.Generic;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+    class Operand
+    {
+        public OperandKind Kind { get; }
+
+        public OperandType Type { get; }
+
+        public ulong Value { get; private set; }
+
+        public LinkedList<Node> Assignments { get; }
+        public LinkedList<Node> Uses        { get; }
+
+        private Operand()
+        {
+            Assignments = new LinkedList<Node>();
+            Uses        = new LinkedList<Node>();
+        }
+
+        public Operand(OperandKind kind, OperandType type = OperandType.None) : this()
+        {
+            Kind = kind;
+            Type = type;
+        }
+
+        public Operand(int value) : this(OperandKind.Constant, OperandType.I32)
+        {
+            Value = (uint)value;
+        }
+
+        public Operand(uint value) : this(OperandKind.Constant, OperandType.I32)
+        {
+            Value = (uint)value;
+        }
+
+        public Operand(long value) : this(OperandKind.Constant, OperandType.I64)
+        {
+            Value = (ulong)value;
+        }
+
+        public Operand(ulong value) : this(OperandKind.Constant, OperandType.I64)
+        {
+            Value = value;
+        }
+
+        public Operand(float value) : this(OperandKind.Constant, OperandType.FP32)
+        {
+            Value = (ulong)BitConverter.SingleToInt32Bits(value);
+        }
+
+        public Operand(double value) : this(OperandKind.Constant, OperandType.FP64)
+        {
+            Value = (ulong)BitConverter.DoubleToInt64Bits(value);
+        }
+
+        public Operand(int index, RegisterType regType, OperandType type) : this()
+        {
+            Kind = OperandKind.Register;
+            Type = type;
+
+            Value = (ulong)((int)regType << 24 | index);
+        }
+
+        public Register GetRegister()
+        {
+            return new Register((int)Value & 0xffffff, (RegisterType)(Value >> 24));
+        }
+
+        public byte AsByte()
+        {
+            return (byte)Value;
+        }
+
+        public short AsInt16()
+        {
+            return (short)Value;
+        }
+
+        public int AsInt32()
+        {
+            return (int)Value;
+        }
+
+        public long AsInt64()
+        {
+            return (long)Value;
+        }
+
+        public float AsFloat()
+        {
+            return BitConverter.Int32BitsToSingle((int)Value);
+        }
+
+        public double AsDouble()
+        {
+            return BitConverter.Int64BitsToDouble((long)Value);
+        }
+
+        internal void NumberLocal(int number)
+        {
+            if (Kind != OperandKind.LocalVariable)
+            {
+                throw new InvalidOperationException("The operand is not a local variable.");
+            }
+
+            Value = (ulong)number;
+        }
+
+        public override int GetHashCode()
+        {
+            if (Kind == OperandKind.LocalVariable)
+            {
+                return base.GetHashCode();
+            }
+            else
+            {
+                return (int)Value ^ ((int)Kind << 16) ^ ((int)Type << 20);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/OperandHelper.cs b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs
new file mode 100644
index 000000000..4a930e03f
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/OperandHelper.cs
@@ -0,0 +1,68 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+    static class OperandHelper
+    {
+        public static Operand Const(OperandType type, long value)
+        {
+            return type == OperandType.I32 ? new Operand((int)value) : new Operand(value);
+        }
+
+        public static Operand Const(bool value)
+        {
+            return new Operand(value ? 1 : 0);
+        }
+
+        public static Operand Const(int value)
+        {
+            return new Operand(value);
+        }
+
+        public static Operand Const(uint value)
+        {
+            return new Operand(value);
+        }
+
+        public static Operand Const(long value)
+        {
+            return new Operand(value);
+        }
+
+        public static Operand Const(ulong value)
+        {
+            return new Operand(value);
+        }
+
+        public static Operand ConstF(float value)
+        {
+            return new Operand(value);
+        }
+
+        public static Operand ConstF(double value)
+        {
+            return new Operand(value);
+        }
+
+        public static Operand Label()
+        {
+            return new Operand(OperandKind.Label);
+        }
+
+        public static Operand Local(OperandType type)
+        {
+            return new Operand(OperandKind.LocalVariable, type);
+        }
+
+        public static Operand Register(int index, RegisterType regType, OperandType type)
+        {
+            return new Operand(index, regType, type);
+        }
+
+        public static Operand Undef()
+        {
+            return new Operand(OperandKind.Undefined);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/OperandKind.cs b/ARMeilleure/IntermediateRepresentation/OperandKind.cs
new file mode 100644
index 000000000..576183534
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/OperandKind.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+    enum OperandKind
+    {
+        Constant,
+        Label,
+        LocalVariable,
+        Memory,
+        Register,
+        Undefined
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/OperandType.cs b/ARMeilleure/IntermediateRepresentation/OperandType.cs
new file mode 100644
index 000000000..bfdf5130c
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/OperandType.cs
@@ -0,0 +1,51 @@
+using System;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+    enum OperandType
+    {
+        None,
+        I32,
+        I64,
+        FP32,
+        FP64,
+        V128
+    }
+
+    static class OperandTypeExtensions
+    {
+        public static bool IsInteger(this OperandType type)
+        {
+            return type == OperandType.I32 ||
+                   type == OperandType.I64;
+        }
+
+        public static RegisterType ToRegisterType(this OperandType type)
+        {
+            switch (type)
+            {
+                case OperandType.FP32: return RegisterType.Vector;
+                case OperandType.FP64: return RegisterType.Vector;
+                case OperandType.I32:  return RegisterType.Integer;
+                case OperandType.I64:  return RegisterType.Integer;
+                case OperandType.V128: return RegisterType.Vector;
+            }
+
+            throw new InvalidOperationException($"Invalid operand type \"{type}\".");
+        }
+
+        public static int GetSizeInBytes(this OperandType type)
+        {
+            switch (type)
+            {
+                case OperandType.FP32: return 4;
+                case OperandType.FP64: return 8;
+                case OperandType.I32:  return 4;
+                case OperandType.I64:  return 8;
+                case OperandType.V128: return 16;
+            }
+
+            throw new InvalidOperationException($"Invalid operand type \"{type}\".");
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Operation.cs b/ARMeilleure/IntermediateRepresentation/Operation.cs
new file mode 100644
index 000000000..620bf3f6e
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Operation.cs
@@ -0,0 +1,40 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+    class Operation : Node
+    {
+        public Instruction Instruction { get; private set; }
+
+        public Operation(
+            Instruction instruction,
+            Operand destination,
+            params Operand[] sources) : base(destination, sources.Length)
+        {
+            Instruction = instruction;
+
+            for (int index = 0; index < sources.Length; index++)
+            {
+                SetSource(index, sources[index]);
+            }
+        }
+
+        public Operation(
+            Instruction instruction,
+            Operand[] destinations,
+            Operand[] sources) : base(destinations, sources.Length)
+        {
+            Instruction = instruction;
+
+            for (int index = 0; index < sources.Length; index++)
+            {
+                SetSource(index, sources[index]);
+            }
+        }
+
+        public void TurnIntoCopy(Operand source)
+        {
+            Instruction = Instruction.Copy;
+
+            SetSources(new Operand[] { source });
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/PhiNode.cs b/ARMeilleure/IntermediateRepresentation/PhiNode.cs
new file mode 100644
index 000000000..30fc4d384
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/PhiNode.cs
@@ -0,0 +1,22 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+    class PhiNode : Node
+    {
+        private BasicBlock[] _blocks;
+
+        public PhiNode(Operand destination, int predecessorsCount) : base(destination, predecessorsCount)
+        {
+            _blocks = new BasicBlock[predecessorsCount];
+        }
+
+        public BasicBlock GetBlock(int index)
+        {
+            return _blocks[index];
+        }
+
+        public void SetBlock(int index, BasicBlock block)
+        {
+            _blocks[index] = block;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/Register.cs b/ARMeilleure/IntermediateRepresentation/Register.cs
new file mode 100644
index 000000000..745b31538
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/Register.cs
@@ -0,0 +1,43 @@
+using System;
+
+namespace ARMeilleure.IntermediateRepresentation
+{
+    struct Register : IEquatable<Register>
+    {
+        public int Index { get; }
+
+        public RegisterType Type { get; }
+
+        public Register(int index, RegisterType type)
+        {
+            Index = index;
+            Type  = type;
+        }
+
+        public override int GetHashCode()
+        {
+            return (ushort)Index | ((int)Type << 16);
+        }
+
+        public static bool operator ==(Register x, Register y)
+        {
+            return x.Equals(y);
+        }
+
+        public static bool operator !=(Register x, Register y)
+        {
+            return !x.Equals(y);
+        }
+
+        public override bool Equals(object obj)
+        {
+            return obj is Register reg && Equals(reg);
+        }
+
+        public bool Equals(Register other)
+        {
+            return other.Index == Index &&
+                   other.Type  == Type;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/IntermediateRepresentation/RegisterType.cs b/ARMeilleure/IntermediateRepresentation/RegisterType.cs
new file mode 100644
index 000000000..e71795cb9
--- /dev/null
+++ b/ARMeilleure/IntermediateRepresentation/RegisterType.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.IntermediateRepresentation
+{
+    enum RegisterType
+    {
+        Integer,
+        Vector,
+        Flag
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/IMemory.cs b/ARMeilleure/Memory/IMemory.cs
new file mode 100644
index 000000000..0c3849c07
--- /dev/null
+++ b/ARMeilleure/Memory/IMemory.cs
@@ -0,0 +1,37 @@
+namespace ARMeilleure.Memory
+{
+    public interface IMemory
+    {
+        sbyte ReadSByte(long position);
+
+        short ReadInt16(long position);
+
+        int ReadInt32(long position);
+
+        long ReadInt64(long position);
+
+        byte ReadByte(long position);
+
+        ushort ReadUInt16(long position);
+
+        uint ReadUInt32(long position);
+
+        ulong ReadUInt64(long position);
+
+        void WriteSByte(long position, sbyte value);
+
+        void WriteInt16(long position, short value);
+
+        void WriteInt32(long position, int value);
+
+        void WriteInt64(long position, long value);
+
+        void WriteByte(long position, byte value);
+
+        void WriteUInt16(long position, ushort value);
+
+        void WriteUInt32(long position, uint value);
+
+        void WriteUInt64(long position, ulong value);
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/IMemoryManager.cs b/ARMeilleure/Memory/IMemoryManager.cs
new file mode 100644
index 000000000..bcee5db23
--- /dev/null
+++ b/ARMeilleure/Memory/IMemoryManager.cs
@@ -0,0 +1,40 @@
+using ARMeilleure.State;
+using System;
+
+namespace ARMeilleure.Memory
+{
+    public interface IMemoryManager : IMemory, IDisposable
+    {
+        void Map(long va, long pa, long size);
+
+        void Unmap(long position, long size);
+
+        bool IsMapped(long position);
+
+        long GetPhysicalAddress(long virtualAddress);
+
+        bool IsRegionModified(long position, long size);
+
+        bool TryGetHostAddress(long position, long size, out IntPtr ptr);
+
+        bool IsValidPosition(long position);
+
+        bool AtomicCompareExchangeInt32(long position, int expected, int desired);
+
+        int AtomicIncrementInt32(long position);
+
+        int AtomicDecrementInt32(long position);
+
+        byte[] ReadBytes(long position, long size);
+
+        void ReadBytes(long position, byte[] data, int startIndex, int size);
+
+        void WriteVector128(long position, V128 value);
+
+        void WriteBytes(long position, byte[] data);
+
+        void WriteBytes(long position, byte[] data, int startIndex, int size);
+
+        void CopyBytes(long src, long dst, long size);
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryHelper.cs b/ARMeilleure/Memory/MemoryHelper.cs
new file mode 100644
index 000000000..71ddac238
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryHelper.cs
@@ -0,0 +1,71 @@
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace ARMeilleure.Memory
+{
+    public static class MemoryHelper
+    {
+        public static void FillWithZeros(IMemoryManager memory, long position, int size)
+        {
+            int size8 = size & ~(8 - 1);
+
+            for (int offs = 0; offs < size8; offs += 8)
+            {
+                memory.WriteInt64(position + offs, 0);
+            }
+
+            for (int offs = size8; offs < (size - size8); offs++)
+            {
+                memory.WriteByte(position + offs, 0);
+            }
+        }
+
+        public unsafe static T Read<T>(IMemoryManager memory, long position) where T : struct
+        {
+            long size = Marshal.SizeOf<T>();
+
+            byte[] data = memory.ReadBytes(position, size);
+
+            fixed (byte* ptr = data)
+            {
+                return Marshal.PtrToStructure<T>((IntPtr)ptr);
+            }
+        }
+
+        public unsafe static void Write<T>(IMemoryManager memory, long position, T value) where T : struct
+        {
+            long size = Marshal.SizeOf<T>();
+
+            byte[] data = new byte[size];
+
+            fixed (byte* ptr = data)
+            {
+                Marshal.StructureToPtr<T>(value, (IntPtr)ptr, false);
+            }
+
+            memory.WriteBytes(position, data);
+        }
+
+        public static string ReadAsciiString(IMemoryManager memory, long position, long maxSize = -1)
+        {
+            using (MemoryStream ms = new MemoryStream())
+            {
+                for (long offs = 0; offs < maxSize || maxSize == -1; offs++)
+                {
+                    byte value = (byte)memory.ReadByte(position + offs);
+
+                    if (value == 0)
+                    {
+                        break;
+                    }
+
+                    ms.WriteByte(value);
+                }
+
+                return Encoding.ASCII.GetString(ms.ToArray());
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManagement.cs b/ARMeilleure/Memory/MemoryManagement.cs
new file mode 100644
index 000000000..bf0bd02ce
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManagement.cs
@@ -0,0 +1,114 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Memory
+{
+    public static class MemoryManagement
+    {
+        public static bool HasWriteWatchSupport => RuntimeInformation.IsOSPlatform(OSPlatform.Windows);
+
+        public static IntPtr Allocate(ulong size)
+        {
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                IntPtr sizeNint = new IntPtr((long)size);
+
+                return MemoryManagementWindows.Allocate(sizeNint);
+            }
+            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+            {
+                return MemoryManagementUnix.Allocate(size);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public static IntPtr AllocateWriteTracked(ulong size)
+        {
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                IntPtr sizeNint = new IntPtr((long)size);
+
+                return MemoryManagementWindows.AllocateWriteTracked(sizeNint);
+            }
+            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+            {
+                return MemoryManagementUnix.Allocate(size);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public static void Reprotect(IntPtr address, ulong size, MemoryProtection permission)
+        {
+            bool result;
+
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                IntPtr sizeNint = new IntPtr((long)size);
+
+                result = MemoryManagementWindows.Reprotect(address, sizeNint, permission);
+            }
+            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+            {
+                result = MemoryManagementUnix.Reprotect(address, size, permission);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+
+            if (!result)
+            {
+                throw new MemoryProtectionException(permission);
+            }
+        }
+
+        public static bool Free(IntPtr address)
+        {
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                return MemoryManagementWindows.Free(address);
+            }
+            else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ||
+                     RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+            {
+                return MemoryManagementUnix.Free(address);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool GetModifiedPages(
+            IntPtr    address,
+            IntPtr    size,
+            IntPtr[]  addresses,
+            out ulong count)
+        {
+            // This is only supported on windows, but returning
+            // false (failed) is also valid for platforms without
+            // write tracking support on the OS.
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                return MemoryManagementWindows.GetModifiedPages(address, size, addresses, out count);
+            }
+            else
+            {
+                count = 0;
+
+                return false;
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManagementUnix.cs b/ARMeilleure/Memory/MemoryManagementUnix.cs
new file mode 100644
index 000000000..3331fb428
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManagementUnix.cs
@@ -0,0 +1,71 @@
+using Mono.Unix.Native;
+using System;
+
+namespace ARMeilleure.Memory
+{
+    static class MemoryManagementUnix
+    {
+        public static IntPtr Allocate(ulong size)
+        {
+            ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
+
+            const MmapProts prot = MmapProts.PROT_READ | MmapProts.PROT_WRITE;
+
+            const MmapFlags flags = MmapFlags.MAP_PRIVATE | MmapFlags.MAP_ANONYMOUS;
+
+            IntPtr ptr = Syscall.mmap(IntPtr.Zero, size + pageSize, prot, flags, -1, 0);
+
+            if (ptr == IntPtr.Zero)
+            {
+                throw new OutOfMemoryException();
+            }
+
+            unsafe
+            {
+                ptr = new IntPtr(ptr.ToInt64() + (long)pageSize);
+
+                *((ulong*)ptr - 1) = size;
+            }
+
+            return ptr;
+        }
+
+        public static bool Reprotect(IntPtr address, ulong size, Memory.MemoryProtection protection)
+        {
+            MmapProts prot = GetProtection(protection);
+
+            return Syscall.mprotect(address, size, prot) == 0;
+        }
+
+        private static MmapProts GetProtection(Memory.MemoryProtection protection)
+        {
+            switch (protection)
+            {
+                case Memory.MemoryProtection.None:             return MmapProts.PROT_NONE;
+                case Memory.MemoryProtection.Read:             return MmapProts.PROT_READ;
+                case Memory.MemoryProtection.ReadAndWrite:     return MmapProts.PROT_READ | MmapProts.PROT_WRITE;
+                case Memory.MemoryProtection.ReadAndExecute:   return MmapProts.PROT_READ | MmapProts.PROT_EXEC;
+                case Memory.MemoryProtection.ReadWriteExecute: return MmapProts.PROT_READ | MmapProts.PROT_WRITE | MmapProts.PROT_EXEC;
+                case Memory.MemoryProtection.Execute:          return MmapProts.PROT_EXEC;
+
+                default: throw new ArgumentException($"Invalid permission \"{protection}\".");
+            }
+        }
+
+        public static bool Free(IntPtr address)
+        {
+            ulong pageSize = (ulong)Syscall.sysconf(SysconfName._SC_PAGESIZE);
+
+            ulong size;
+
+            unsafe
+            {
+                size = *((ulong*)address - 1);
+
+                address = new IntPtr(address.ToInt64() - (long)pageSize);
+            }
+
+            return Syscall.munmap(address, size + pageSize) == 0;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManagementWindows.cs b/ARMeilleure/Memory/MemoryManagementWindows.cs
new file mode 100644
index 000000000..c1a84c95b
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManagementWindows.cs
@@ -0,0 +1,156 @@
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Memory
+{
+    static class MemoryManagementWindows
+    {
+        [Flags]
+        private enum AllocationType : uint
+        {
+            Commit     = 0x1000,
+            Reserve    = 0x2000,
+            Decommit   = 0x4000,
+            Release    = 0x8000,
+            Reset      = 0x80000,
+            Physical   = 0x400000,
+            TopDown    = 0x100000,
+            WriteWatch = 0x200000,
+            LargePages = 0x20000000
+        }
+
+        [Flags]
+        private enum MemoryProtection : uint
+        {
+            NoAccess                 = 0x01,
+            ReadOnly                 = 0x02,
+            ReadWrite                = 0x04,
+            WriteCopy                = 0x08,
+            Execute                  = 0x10,
+            ExecuteRead              = 0x20,
+            ExecuteReadWrite         = 0x40,
+            ExecuteWriteCopy         = 0x80,
+            GuardModifierflag        = 0x100,
+            NoCacheModifierflag      = 0x200,
+            WriteCombineModifierflag = 0x400
+        }
+
+        private enum WriteWatchFlags : uint
+        {
+            None  = 0,
+            Reset = 1
+        }
+
+        [DllImport("kernel32.dll")]
+        private static extern IntPtr VirtualAlloc(
+            IntPtr           lpAddress,
+            IntPtr           dwSize,
+            AllocationType   flAllocationType,
+            MemoryProtection flProtect);
+
+        [DllImport("kernel32.dll")]
+        private static extern bool VirtualProtect(
+            IntPtr               lpAddress,
+            IntPtr               dwSize,
+            MemoryProtection     flNewProtect,
+            out MemoryProtection lpflOldProtect);
+
+        [DllImport("kernel32.dll")]
+        private static extern bool VirtualFree(
+            IntPtr         lpAddress,
+            IntPtr         dwSize,
+            AllocationType dwFreeType);
+
+        [DllImport("kernel32.dll")]
+        private static extern int GetWriteWatch(
+            WriteWatchFlags dwFlags,
+            IntPtr          lpBaseAddress,
+            IntPtr          dwRegionSize,
+            IntPtr[]        lpAddresses,
+            ref ulong       lpdwCount,
+            out uint        lpdwGranularity);
+
+        public static IntPtr Allocate(IntPtr size)
+        {
+            const AllocationType flags =
+                AllocationType.Reserve |
+                AllocationType.Commit;
+
+            IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
+
+            if (ptr == IntPtr.Zero)
+            {
+                throw new OutOfMemoryException();
+            }
+
+            return ptr;
+        }
+
+        public static IntPtr AllocateWriteTracked(IntPtr size)
+        {
+            const AllocationType flags =
+                AllocationType.Reserve |
+                AllocationType.Commit  |
+                AllocationType.WriteWatch;
+
+            IntPtr ptr = VirtualAlloc(IntPtr.Zero, size, flags, MemoryProtection.ReadWrite);
+
+            if (ptr == IntPtr.Zero)
+            {
+                throw new OutOfMemoryException();
+            }
+
+            return ptr;
+        }
+
+        public static bool Reprotect(IntPtr address, IntPtr size, Memory.MemoryProtection protection)
+        {
+            MemoryProtection prot = GetProtection(protection);
+
+            return VirtualProtect(address, size, prot, out _);
+        }
+
+        private static MemoryProtection GetProtection(Memory.MemoryProtection protection)
+        {
+            switch (protection)
+            {
+                case Memory.MemoryProtection.None:             return MemoryProtection.NoAccess;
+                case Memory.MemoryProtection.Read:             return MemoryProtection.ReadOnly;
+                case Memory.MemoryProtection.ReadAndWrite:     return MemoryProtection.ReadWrite;
+                case Memory.MemoryProtection.ReadAndExecute:   return MemoryProtection.ExecuteRead;
+                case Memory.MemoryProtection.ReadWriteExecute: return MemoryProtection.ExecuteReadWrite;
+                case Memory.MemoryProtection.Execute:          return MemoryProtection.Execute;
+
+                default: throw new ArgumentException($"Invalid permission \"{protection}\".");
+            }
+        }
+
+        public static bool Free(IntPtr address)
+        {
+            return VirtualFree(address, IntPtr.Zero, AllocationType.Release);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool GetModifiedPages(
+            IntPtr    address,
+            IntPtr    size,
+            IntPtr[]  addresses,
+            out ulong count)
+        {
+            ulong pagesCount = (ulong)addresses.Length;
+
+            int result = GetWriteWatch(
+                WriteWatchFlags.Reset,
+                address,
+                size,
+                addresses,
+                ref pagesCount,
+                out uint granularity);
+
+            count = pagesCount;
+
+            return result == 0;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManager.cs b/ARMeilleure/Memory/MemoryManager.cs
new file mode 100644
index 000000000..12c118437
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManager.cs
@@ -0,0 +1,835 @@
+using ARMeilleure.State;
+using System;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+using static ARMeilleure.Memory.MemoryManagement;
+
+namespace ARMeilleure.Memory
+{
+    public unsafe class MemoryManager : IMemoryManager
+    {
+        public const int PageBits = 12;
+        public const int PageSize = 1 << PageBits;
+        public const int PageMask = PageSize - 1;
+
+        private const long PteFlagNotModified = 1;
+
+        internal const long PteFlagsMask = 7;
+
+        public IntPtr Ram { get; private set; }
+
+        private byte* _ramPtr;
+
+        private IntPtr _pageTable;
+
+        internal IntPtr PageTable => _pageTable;
+
+        internal int PtLevelBits { get; }
+        internal int PtLevelSize { get; }
+        internal int PtLevelMask { get; }
+
+        public bool HasWriteWatchSupport => MemoryManagement.HasWriteWatchSupport;
+
+        public int  AddressSpaceBits { get; }
+        public long AddressSpaceSize { get; }
+
+        public MemoryManager(
+            IntPtr ram,
+            int    addressSpaceBits = 48,
+            bool   useFlatPageTable = false)
+        {
+            Ram = ram;
+
+            _ramPtr = (byte*)ram;
+
+            AddressSpaceBits = addressSpaceBits;
+            AddressSpaceSize = 1L << addressSpaceBits;
+
+            // When flat page table is requested, we use a single
+            // array for the mappings of the entire address space.
+            // This has better performance, but also high memory usage.
+            // The multi level page table uses 9 bits per level, so
+            // the memory usage is lower, but the performance is also
+            // lower, since each address translation requires multiple reads.
+            if (useFlatPageTable)
+            {
+                PtLevelBits = addressSpaceBits - PageBits;
+            }
+            else
+            {
+                PtLevelBits = 9;
+            }
+
+            PtLevelSize = 1 << PtLevelBits;
+            PtLevelMask = PtLevelSize - 1;
+
+            _pageTable = Allocate((ulong)(PtLevelSize * IntPtr.Size));
+        }
+
+        public void Map(long va, long pa, long size)
+        {
+            SetPtEntries(va, _ramPtr + pa, size);
+        }
+
+        public void Unmap(long position, long size)
+        {
+            SetPtEntries(position, null, size);
+        }
+
+        public bool IsMapped(long position)
+        {
+            return Translate(position) != IntPtr.Zero;
+        }
+
+        public long GetPhysicalAddress(long virtualAddress)
+        {
+            byte* ptr = (byte*)Translate(virtualAddress);
+
+            return (long)(ptr - _ramPtr);
+        }
+
+        private IntPtr Translate(long position)
+        {
+            if (!IsValidPosition(position))
+            {
+                return IntPtr.Zero;
+            }
+
+            byte* ptr = GetPtEntry(position);
+
+            ulong ptrUlong = (ulong)ptr;
+
+            if ((ptrUlong & PteFlagsMask) != 0)
+            {
+                ptrUlong &= ~(ulong)PteFlagsMask;
+
+                ptr = (byte*)ptrUlong;
+            }
+
+            return new IntPtr(ptr + (position & PageMask));
+        }
+
+        private IntPtr TranslateWrite(long position)
+        {
+            if (!IsValidPosition(position))
+            {
+                return IntPtr.Zero;
+            }
+
+            byte* ptr = GetPtEntry(position);
+
+            ulong ptrUlong = (ulong)ptr;
+
+            if ((ptrUlong & PteFlagsMask) != 0)
+            {
+                if ((ptrUlong & PteFlagNotModified) != 0)
+                {
+                    ClearPtEntryFlag(position, PteFlagNotModified);
+                }
+
+                ptrUlong &= ~(ulong)PteFlagsMask;
+
+                ptr = (byte*)ptrUlong;
+            }
+
+            return new IntPtr(ptr + (position & PageMask));
+        }
+
+        private byte* GetPtEntry(long position)
+        {
+            return *(byte**)GetPtPtr(position);
+        }
+
+        private void SetPtEntries(long va, byte* ptr, long size)
+        {
+            long endPosition = (va + size + PageMask) & ~PageMask;
+
+            while ((ulong)va < (ulong)endPosition)
+            {
+                SetPtEntry(va, ptr);
+
+                va += PageSize;
+
+                if (ptr != null)
+                {
+                    ptr += PageSize;
+                }
+            }
+        }
+
+        private void SetPtEntry(long position, byte* ptr)
+        {
+            *(byte**)GetPtPtr(position) = ptr;
+        }
+
+        private void SetPtEntryFlag(long position, long flag)
+        {
+            ModifyPtEntryFlag(position, flag, setFlag: true);
+        }
+
+        private void ClearPtEntryFlag(long position, long flag)
+        {
+            ModifyPtEntryFlag(position, flag, setFlag: false);
+        }
+
+        private void ModifyPtEntryFlag(long position, long flag, bool setFlag)
+        {
+            IntPtr* pt = (IntPtr*)_pageTable;
+
+            while (true)
+            {
+                IntPtr* ptPtr = GetPtPtr(position);
+
+                IntPtr old = *ptPtr;
+
+                long modified = old.ToInt64();
+
+                if (setFlag)
+                {
+                    modified |= flag;
+                }
+                else
+                {
+                    modified &= ~flag;
+                }
+
+                IntPtr origValue = Interlocked.CompareExchange(ref *ptPtr, new IntPtr(modified), old);
+
+                if (origValue == old)
+                {
+                    break;
+                }
+            }
+        }
+
+        private IntPtr* GetPtPtr(long position)
+        {
+            if (!IsValidPosition(position))
+            {
+                throw new ArgumentOutOfRangeException(nameof(position));
+            }
+
+            IntPtr nextPtr = _pageTable;
+
+            IntPtr* ptePtr = null;
+
+            int bit = PageBits;
+
+            while (true)
+            {
+                long index = (position >> bit) & PtLevelMask;
+
+                ptePtr = &((IntPtr*)nextPtr)[index];
+
+                bit += PtLevelBits;
+
+                if (bit >= AddressSpaceBits)
+                {
+                    break;
+                }
+
+                nextPtr = *ptePtr;
+
+                if (nextPtr == IntPtr.Zero)
+                {
+                    // Entry does not yet exist, allocate a new one.
+                    IntPtr newPtr = Allocate((ulong)(PtLevelSize * IntPtr.Size));
+
+                    // Try to swap the current pointer (should be zero), with the allocated one.
+                    nextPtr = Interlocked.CompareExchange(ref *ptePtr, newPtr, IntPtr.Zero);
+
+                    // If the old pointer is not null, then another thread already has set it.
+                    if (nextPtr != IntPtr.Zero)
+                    {
+                        Free(newPtr);
+                    }
+                    else
+                    {
+                        nextPtr = newPtr;
+                    }
+                }
+            }
+
+            return ptePtr;
+        }
+
+        public bool IsRegionModified(long position, long size)
+        {
+            if (!HasWriteWatchSupport)
+            {
+                return IsRegionModifiedFallback(position, size);
+            }
+
+            IntPtr address = Translate(position);
+
+            IntPtr baseAddr     = address;
+            IntPtr expectedAddr = address;
+
+            long pendingPages = 0;
+
+            long pages = size / PageSize;
+
+            bool modified = false;
+
+            bool IsAnyPageModified()
+            {
+                IntPtr pendingSize = new IntPtr(pendingPages * PageSize);
+
+                IntPtr[] addresses = new IntPtr[pendingPages];
+
+                bool result = GetModifiedPages(baseAddr, pendingSize, addresses, out ulong count);
+
+                if (result)
+                {
+                    return count != 0;
+                }
+                else
+                {
+                    return true;
+                }
+            }
+
+            while (pages-- > 0)
+            {
+                if (address != expectedAddr)
+                {
+                    modified |= IsAnyPageModified();
+
+                    baseAddr = address;
+
+                    pendingPages = 0;
+                }
+
+                expectedAddr = address + PageSize;
+
+                pendingPages++;
+
+                if (pages == 0)
+                {
+                    break;
+                }
+
+                position += PageSize;
+
+                address = Translate(position);
+            }
+
+            if (pendingPages != 0)
+            {
+                modified |= IsAnyPageModified();
+            }
+
+            return modified;
+        }
+
+        private unsafe bool IsRegionModifiedFallback(long position, long size)
+        {
+            long endAddr = (position + size + PageMask) & ~PageMask;
+
+            bool modified = false;
+
+            while ((ulong)position < (ulong)endAddr)
+            {
+                if (IsValidPosition(position))
+                {
+                    byte* ptr = ((byte**)_pageTable)[position >> PageBits];
+
+                    ulong ptrUlong = (ulong)ptr;
+
+                    if ((ptrUlong & PteFlagNotModified) == 0)
+                    {
+                        modified = true;
+
+                        SetPtEntryFlag(position, PteFlagNotModified);
+                    }
+                }
+                else
+                {
+                    modified = true;
+                }
+
+                position += PageSize;
+            }
+
+            return modified;
+        }
+
+        public bool TryGetHostAddress(long position, long size, out IntPtr ptr)
+        {
+            if (IsContiguous(position, size))
+            {
+                ptr = (IntPtr)Translate(position);
+
+                return true;
+            }
+
+            ptr = IntPtr.Zero;
+
+            return false;
+        }
+
+        private bool IsContiguous(long position, long size)
+        {
+            long endPos = position + size;
+
+            position &= ~PageMask;
+
+            long expectedPa = GetPhysicalAddress(position);
+
+            while ((ulong)position < (ulong)endPos)
+            {
+                long pa = GetPhysicalAddress(position);
+
+                if (pa != expectedPa)
+                {
+                    return false;
+                }
+
+                position   += PageSize;
+                expectedPa += PageSize;
+            }
+
+            return true;
+        }
+
+        public bool IsValidPosition(long position)
+        {
+            return (ulong)position < (ulong)AddressSpaceSize;
+        }
+
+        internal V128 AtomicLoadInt128(long position)
+        {
+            if ((position & 0xf) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            IntPtr ptr = TranslateWrite(position);
+
+            return MemoryManagerPal.AtomicLoad128(ptr);
+        }
+
+        internal bool AtomicCompareExchangeByte(long position, byte expected, byte desired)
+        {
+            int* ptr = (int*)Translate(position);
+
+            int currentValue = *ptr;
+
+            int expected32 = (currentValue & ~byte.MaxValue) | expected;
+            int desired32  = (currentValue & ~byte.MaxValue) | desired;
+
+            return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
+        }
+
+        internal bool AtomicCompareExchangeInt16(long position, short expected, short desired)
+        {
+            if ((position & 1) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            int* ptr = (int*)Translate(position);
+
+            int currentValue = *ptr;
+
+            int expected32 = (currentValue & ~ushort.MaxValue) | (ushort)expected;
+            int desired32  = (currentValue & ~ushort.MaxValue) | (ushort)desired;
+
+            return Interlocked.CompareExchange(ref *ptr, desired32, expected32) == expected32;
+        }
+
+        public bool AtomicCompareExchangeInt32(long position, int expected, int desired)
+        {
+            if ((position & 3) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            int* ptr = (int*)TranslateWrite(position);
+
+            return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
+        }
+
+        internal bool AtomicCompareExchangeInt64(long position, long expected, long desired)
+        {
+            if ((position & 7) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            long* ptr = (long*)TranslateWrite(position);
+
+            return Interlocked.CompareExchange(ref *ptr, desired, expected) == expected;
+        }
+
+        internal bool AtomicCompareExchangeInt128(long position, V128 expected, V128 desired)
+        {
+            if ((position & 0xf) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            IntPtr ptr = TranslateWrite(position);
+
+            return MemoryManagerPal.CompareAndSwap128(ptr, expected, desired) == expected;
+        }
+
+        public int AtomicIncrementInt32(long position)
+        {
+            if ((position & 3) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            int* ptr = (int*)TranslateWrite(position);
+
+            return Interlocked.Increment(ref *ptr);
+        }
+
+        public int AtomicDecrementInt32(long position)
+        {
+            if ((position & 3) != 0)
+            {
+                AbortWithAlignmentFault(position);
+            }
+
+            int* ptr = (int*)TranslateWrite(position);
+
+            return Interlocked.Decrement(ref *ptr);
+        }
+
+        private void AbortWithAlignmentFault(long position)
+        {
+            // TODO: Abort mode and exception support on the CPU.
+            throw new InvalidOperationException($"Tried to compare exchange a misaligned address 0x{position:X16}.");
+        }
+
+        public sbyte ReadSByte(long position)
+        {
+            return (sbyte)ReadByte(position);
+        }
+
+        public short ReadInt16(long position)
+        {
+            return (short)ReadUInt16(position);
+        }
+
+        public int ReadInt32(long position)
+        {
+            return (int)ReadUInt32(position);
+        }
+
+        public long ReadInt64(long position)
+        {
+            return (long)ReadUInt64(position);
+        }
+
+        public byte ReadByte(long position)
+        {
+            return *((byte*)Translate(position));
+        }
+
+        public ushort ReadUInt16(long position)
+        {
+            if ((position & 1) == 0)
+            {
+                return *((ushort*)Translate(position));
+            }
+            else
+            {
+                return (ushort)(ReadByte(position + 0) << 0 |
+                                ReadByte(position + 1) << 8);
+            }
+        }
+
+        public uint ReadUInt32(long position)
+        {
+            if ((position & 3) == 0)
+            {
+                return *((uint*)Translate(position));
+            }
+            else
+            {
+                return (uint)(ReadUInt16(position + 0) << 0 |
+                              ReadUInt16(position + 2) << 16);
+            }
+        }
+
+        public ulong ReadUInt64(long position)
+        {
+            if ((position & 7) == 0)
+            {
+                return *((ulong*)Translate(position));
+            }
+            else
+            {
+                return (ulong)ReadUInt32(position + 0) << 0 |
+                       (ulong)ReadUInt32(position + 4) << 32;
+            }
+        }
+
+        public V128 ReadVector128(long position)
+        {
+            return new V128(ReadUInt64(position), ReadUInt64(position + 8));
+        }
+
+        public byte[] ReadBytes(long position, long size)
+        {
+            long endAddr = position + size;
+
+            if ((ulong)size > int.MaxValue)
+            {
+                throw new ArgumentOutOfRangeException(nameof(size));
+            }
+
+            if ((ulong)endAddr < (ulong)position)
+            {
+                throw new ArgumentOutOfRangeException(nameof(position));
+            }
+
+            byte[] data = new byte[size];
+
+            int offset = 0;
+
+            while ((ulong)position < (ulong)endAddr)
+            {
+                long pageLimit = (position + PageSize) & ~(long)PageMask;
+
+                if ((ulong)pageLimit > (ulong)endAddr)
+                {
+                    pageLimit = endAddr;
+                }
+
+                int copySize = (int)(pageLimit - position);
+
+                Marshal.Copy(Translate(position), data, offset, copySize);
+
+                position += copySize;
+                offset   += copySize;
+            }
+
+            return data;
+        }
+
+        public void ReadBytes(long position, byte[] data, int startIndex, int size)
+        {
+            // Note: This will be moved later.
+            long endAddr = position + size;
+
+            if ((ulong)size > int.MaxValue)
+            {
+                throw new ArgumentOutOfRangeException(nameof(size));
+            }
+
+            if ((ulong)endAddr < (ulong)position)
+            {
+                throw new ArgumentOutOfRangeException(nameof(position));
+            }
+
+            int offset = startIndex;
+
+            while ((ulong)position < (ulong)endAddr)
+            {
+                long pageLimit = (position + PageSize) & ~(long)PageMask;
+
+                if ((ulong)pageLimit > (ulong)endAddr)
+                {
+                    pageLimit = endAddr;
+                }
+
+                int copySize = (int)(pageLimit - position);
+
+                Marshal.Copy(Translate(position), data, offset, copySize);
+
+                position += copySize;
+                offset   += copySize;
+            }
+        }
+
+        public void WriteSByte(long position, sbyte value)
+        {
+            WriteByte(position, (byte)value);
+        }
+
+        public void WriteInt16(long position, short value)
+        {
+            WriteUInt16(position, (ushort)value);
+        }
+
+        public void WriteInt32(long position, int value)
+        {
+            WriteUInt32(position, (uint)value);
+        }
+
+        public void WriteInt64(long position, long value)
+        {
+            WriteUInt64(position, (ulong)value);
+        }
+
+        public void WriteByte(long position, byte value)
+        {
+            *((byte*)TranslateWrite(position)) = value;
+        }
+
+        public void WriteUInt16(long position, ushort value)
+        {
+            if ((position & 1) == 0)
+            {
+                *((ushort*)TranslateWrite(position)) = value;
+            }
+            else
+            {
+                WriteByte(position + 0, (byte)(value >> 0));
+                WriteByte(position + 1, (byte)(value >> 8));
+            }
+        }
+
+        public void WriteUInt32(long position, uint value)
+        {
+            if ((position & 3) == 0)
+            {
+                *((uint*)TranslateWrite(position)) = value;
+            }
+            else
+            {
+                WriteUInt16(position + 0, (ushort)(value >> 0));
+                WriteUInt16(position + 2, (ushort)(value >> 16));
+            }
+        }
+
+        public void WriteUInt64(long position, ulong value)
+        {
+            if ((position & 7) == 0)
+            {
+                *((ulong*)TranslateWrite(position)) = value;
+            }
+            else
+            {
+                WriteUInt32(position + 0, (uint)(value >> 0));
+                WriteUInt32(position + 4, (uint)(value >> 32));
+            }
+        }
+
+        public void WriteVector128(long position, V128 value)
+        {
+            WriteUInt64(position + 0, value.GetUInt64(0));
+            WriteUInt64(position + 8, value.GetUInt64(1));
+        }
+
+        public void WriteBytes(long position, byte[] data)
+        {
+            long endAddr = position + data.Length;
+
+            if ((ulong)endAddr < (ulong)position)
+            {
+                throw new ArgumentOutOfRangeException(nameof(position));
+            }
+
+            int offset = 0;
+
+            while ((ulong)position < (ulong)endAddr)
+            {
+                long pageLimit = (position + PageSize) & ~(long)PageMask;
+
+                if ((ulong)pageLimit > (ulong)endAddr)
+                {
+                    pageLimit = endAddr;
+                }
+
+                int copySize = (int)(pageLimit - position);
+
+                Marshal.Copy(data, offset, TranslateWrite(position), copySize);
+
+                position += copySize;
+                offset   += copySize;
+            }
+        }
+
+        public void WriteBytes(long position, byte[] data, int startIndex, int size)
+        {
+            // Note: This will be moved later.
+            long endAddr = position + size;
+
+            if ((ulong)endAddr < (ulong)position)
+            {
+                throw new ArgumentOutOfRangeException(nameof(position));
+            }
+
+            int offset = startIndex;
+
+            while ((ulong)position < (ulong)endAddr)
+            {
+                long pageLimit = (position + PageSize) & ~(long)PageMask;
+
+                if ((ulong)pageLimit > (ulong)endAddr)
+                {
+                    pageLimit = endAddr;
+                }
+
+                int copySize = (int)(pageLimit - position);
+
+                Marshal.Copy(data, offset, Translate(position), copySize);
+
+                position += copySize;
+                offset   += copySize;
+            }
+        }
+
+        public void CopyBytes(long src, long dst, long size)
+        {
+            // Note: This will be moved later.
+            if (IsContiguous(src, size) &&
+                IsContiguous(dst, size))
+            {
+                byte* srcPtr = (byte*)Translate(src);
+                byte* dstPtr = (byte*)Translate(dst);
+
+                Buffer.MemoryCopy(srcPtr, dstPtr, size, size);
+            }
+            else
+            {
+                WriteBytes(dst, ReadBytes(src, size));
+            }
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            IntPtr ptr = Interlocked.Exchange(ref _pageTable, IntPtr.Zero);
+
+            if (ptr != IntPtr.Zero)
+            {
+                FreePageTableEntry(ptr, PageBits);
+            }
+        }
+
+        private void FreePageTableEntry(IntPtr ptr, int levelBitEnd)
+        {
+            levelBitEnd += PtLevelBits;
+
+            if (levelBitEnd >= AddressSpaceBits)
+            {
+                Free(ptr);
+
+                return;
+            }
+
+            for (int index = 0; index < PtLevelSize; index++)
+            {
+                IntPtr ptePtr = ((IntPtr*)ptr)[index];
+
+                if (ptePtr != IntPtr.Zero)
+                {
+                    FreePageTableEntry(ptePtr, levelBitEnd);
+                }
+            }
+
+            Free(ptr);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryManagerPal.cs b/ARMeilleure/Memory/MemoryManagerPal.cs
new file mode 100644
index 000000000..64191a0ac
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryManagerPal.cs
@@ -0,0 +1,77 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
+using System;
+
+namespace ARMeilleure.Memory
+{
+    static class MemoryManagerPal
+    {
+        private delegate V128 CompareExchange128(IntPtr address, V128 expected, V128 desired);
+
+        private static CompareExchange128 _compareExchange128;
+
+        private static object _lock;
+
+        static MemoryManagerPal()
+        {
+            _lock = new object();
+        }
+
+        public static V128 AtomicLoad128(IntPtr address)
+        {
+            return GetCompareAndSwap128()(address, V128.Zero, V128.Zero);
+        }
+
+        public static V128 CompareAndSwap128(IntPtr address, V128 expected, V128 desired)
+        {
+            return GetCompareAndSwap128()(address, expected, desired);
+        }
+
+        private static CompareExchange128 GetCompareAndSwap128()
+        {
+            if (_compareExchange128 == null)
+            {
+                GenerateCompareAndSwap128();
+            }
+
+            return _compareExchange128;
+        }
+
+        private static void GenerateCompareAndSwap128()
+        {
+            lock (_lock)
+            {
+                if (_compareExchange128 != null)
+                {
+                    return;
+                }
+
+                EmitterContext context = new EmitterContext();
+
+                Operand address  = context.LoadArgument(OperandType.I64,  0);
+                Operand expected = context.LoadArgument(OperandType.V128, 1);
+                Operand desired  = context.LoadArgument(OperandType.V128, 2);
+
+                Operand result = context.CompareAndSwap128(address, expected, desired);
+
+                context.Return(result);
+
+                ControlFlowGraph cfg = context.GetControlFlowGraph();
+
+                OperandType[] argTypes = new OperandType[]
+                {
+                    OperandType.I64,
+                    OperandType.V128,
+                    OperandType.V128
+                };
+
+                _compareExchange128 = Compiler.Compile<CompareExchange128>(
+                    cfg,
+                    argTypes,
+                    OperandType.V128,
+                    CompilerOptions.HighCq);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryProtection.cs b/ARMeilleure/Memory/MemoryProtection.cs
new file mode 100644
index 000000000..6bc16f8ea
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryProtection.cs
@@ -0,0 +1,17 @@
+using System;
+
+namespace ARMeilleure.Memory
+{
+    [Flags]
+    public enum MemoryProtection
+    {
+        None    = 0,
+        Read    = 1 << 0,
+        Write   = 1 << 1,
+        Execute = 1 << 2,
+
+        ReadAndWrite     = Read | Write,
+        ReadAndExecute   = Read | Execute,
+        ReadWriteExecute = Read | Write | Execute
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Memory/MemoryProtectionException.cs b/ARMeilleure/Memory/MemoryProtectionException.cs
new file mode 100644
index 000000000..6313ce6a1
--- /dev/null
+++ b/ARMeilleure/Memory/MemoryProtectionException.cs
@@ -0,0 +1,9 @@
+using System;
+
+namespace ARMeilleure.Memory
+{
+    class MemoryProtectionException : Exception
+    {
+        public MemoryProtectionException(MemoryProtection protection) :  base($"Failed to set memory protection to \"{protection}\".") { }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Optimizations.cs b/ARMeilleure/Optimizations.cs
new file mode 100644
index 000000000..0b9885dc9
--- /dev/null
+++ b/ARMeilleure/Optimizations.cs
@@ -0,0 +1,33 @@
+using ARMeilleure.CodeGen.X86;
+
+namespace ARMeilleure
+{
+    public static class Optimizations
+    {
+        public static bool AssumeStrictAbiCompliance { get; set; } = true;
+
+        public static bool FastFP { get; set; } = true;
+
+        public static bool UseSseIfAvailable    { get; set; } = true;
+        public static bool UseSse2IfAvailable   { get; set; } = true;
+        public static bool UseSse3IfAvailable   { get; set; } = true;
+        public static bool UseSsse3IfAvailable  { get; set; } = true;
+        public static bool UseSse41IfAvailable  { get; set; } = true;
+        public static bool UseSse42IfAvailable  { get; set; } = true;
+        public static bool UsePopCntIfAvailable { get; set; } = true;
+
+        public static bool ForceLegacySse
+        {
+            get => HardwareCapabilities.ForceLegacySse;
+            set => HardwareCapabilities.ForceLegacySse = value;
+        }
+
+        internal static bool UseSse    => UseSseIfAvailable    && HardwareCapabilities.SupportsSse;
+        internal static bool UseSse2   => UseSse2IfAvailable   && HardwareCapabilities.SupportsSse2;
+        internal static bool UseSse3   => UseSse3IfAvailable   && HardwareCapabilities.SupportsSse3;
+        internal static bool UseSsse3  => UseSsse3IfAvailable  && HardwareCapabilities.SupportsSsse3;
+        internal static bool UseSse41  => UseSse41IfAvailable  && HardwareCapabilities.SupportsSse41;
+        internal static bool UseSse42  => UseSse42IfAvailable  && HardwareCapabilities.SupportsSse42;
+        internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/Aarch32Mode.cs b/ARMeilleure/State/Aarch32Mode.cs
new file mode 100644
index 000000000..395e288aa
--- /dev/null
+++ b/ARMeilleure/State/Aarch32Mode.cs
@@ -0,0 +1,15 @@
+namespace ARMeilleure.State
+{
+    enum Aarch32Mode
+    {
+        User       = 0b10000,
+        Fiq        = 0b10001,
+        Irq        = 0b10010,
+        Supervisor = 0b10011,
+        Monitor    = 0b10110,
+        Abort      = 0b10111,
+        Hypervisor = 0b11010,
+        Undefined  = 0b11011,
+        System     = 0b11111
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/ExecutionContext.cs b/ARMeilleure/State/ExecutionContext.cs
new file mode 100644
index 000000000..22cfcb694
--- /dev/null
+++ b/ARMeilleure/State/ExecutionContext.cs
@@ -0,0 +1,130 @@
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.State
+{
+    public class ExecutionContext : IExecutionContext
+    {
+        private const int MinCountForCheck = 40000;
+
+        private NativeContext _nativeContext;
+
+        internal IntPtr NativeContextPtr => _nativeContext.BasePtr;
+
+        private bool _interrupted;
+
+        private static Stopwatch _tickCounter;
+
+        private static double _hostTickFreq;
+
+        public uint CtrEl0   => 0x8444c004;
+        public uint DczidEl0 => 0x00000004;
+
+        public ulong CntfrqEl0 { get; set; }
+        public ulong CntpctEl0
+        {
+            get
+            {
+                double ticks = _tickCounter.ElapsedTicks * _hostTickFreq;
+
+                return (ulong)(ticks * CntfrqEl0);
+            }
+        }
+
+        public long TpidrEl0 { get; set; }
+        public long Tpidr    { get; set; }
+
+        public FPCR Fpcr { get; set; }
+        public FPSR Fpsr { get; set; }
+
+        public bool IsAarch32 { get; set; }
+
+        internal ExecutionMode ExecutionMode
+        {
+            get
+            {
+                if (IsAarch32)
+                {
+                    return GetPstateFlag(PState.TFlag)
+                        ? ExecutionMode.Aarch32Thumb
+                        : ExecutionMode.Aarch32Arm;
+                }
+                else
+                {
+                    return ExecutionMode.Aarch64;
+                }
+            }
+        }
+
+        public bool Running { get; set; }
+
+        public event EventHandler<EventArgs>              Interrupt;
+        public event EventHandler<InstExceptionEventArgs> Break;
+        public event EventHandler<InstExceptionEventArgs> SupervisorCall;
+        public event EventHandler<InstUndefinedEventArgs> Undefined;
+
+        static ExecutionContext()
+        {
+            _hostTickFreq = 1.0 / Stopwatch.Frequency;
+
+            _tickCounter = new Stopwatch();
+
+            _tickCounter.Start();
+        }
+
+        public ExecutionContext()
+        {
+            _nativeContext = new NativeContext();
+
+            Running = true;
+
+            _nativeContext.SetCounter(MinCountForCheck);
+        }
+
+        public ulong GetX(int index)              => _nativeContext.GetX(index);
+        public void  SetX(int index, ulong value) => _nativeContext.SetX(index, value);
+
+        public V128 GetV(int index)             => _nativeContext.GetV(index);
+        public void SetV(int index, V128 value) => _nativeContext.SetV(index, value);
+
+        public bool GetPstateFlag(PState flag)             => _nativeContext.GetPstateFlag(flag);
+        public void SetPstateFlag(PState flag, bool value) => _nativeContext.SetPstateFlag(flag, value);
+
+        internal void CheckInterrupt()
+        {
+            if (_interrupted)
+            {
+                _interrupted = false;
+
+                Interrupt?.Invoke(this, EventArgs.Empty);
+            }
+
+            _nativeContext.SetCounter(MinCountForCheck);
+        }
+
+        public void RequestInterrupt()
+        {
+            _interrupted = true;
+        }
+
+        internal void OnBreak(ulong address, int imm)
+        {
+            Break?.Invoke(this, new InstExceptionEventArgs(address, imm));
+        }
+
+        internal void OnSupervisorCall(ulong address, int imm)
+        {
+            SupervisorCall?.Invoke(this, new InstExceptionEventArgs(address, imm));
+        }
+
+        internal void OnUndefined(ulong address, int opCode)
+        {
+            Undefined?.Invoke(this, new InstUndefinedEventArgs(address, opCode));
+        }
+
+        public void Dispose()
+        {
+            _nativeContext.Dispose();
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/ExecutionMode.cs b/ARMeilleure/State/ExecutionMode.cs
new file mode 100644
index 000000000..eaed9d27f
--- /dev/null
+++ b/ARMeilleure/State/ExecutionMode.cs
@@ -0,0 +1,9 @@
+namespace ARMeilleure.State
+{
+    enum ExecutionMode
+    {
+        Aarch32Arm,
+        Aarch32Thumb,
+        Aarch64
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/FPCR.cs b/ARMeilleure/State/FPCR.cs
new file mode 100644
index 000000000..511681fa9
--- /dev/null
+++ b/ARMeilleure/State/FPCR.cs
@@ -0,0 +1,23 @@
+using System;
+
+namespace ARMeilleure.State
+{
+    [Flags]
+    public enum FPCR
+    {
+        Ufe = 1 << 11,
+        Fz  = 1 << 24,
+        Dn  = 1 << 25,
+        Ahp = 1 << 26
+    }
+
+    public static class FPCRExtensions
+    {
+        private const int RModeShift = 22;
+
+        public static FPRoundingMode GetRoundingMode(this FPCR fpcr)
+        {
+            return (FPRoundingMode)(((int)fpcr >> RModeShift) & 3);
+        }
+    }
+}
diff --git a/ARMeilleure/State/FPException.cs b/ARMeilleure/State/FPException.cs
new file mode 100644
index 000000000..e24e07af1
--- /dev/null
+++ b/ARMeilleure/State/FPException.cs
@@ -0,0 +1,12 @@
+namespace ARMeilleure.State
+{
+    enum FPException
+    {
+        InvalidOp    = 0,
+        DivideByZero = 1,
+        Overflow     = 2,
+        Underflow    = 3,
+        Inexact      = 4,
+        InputDenorm  = 7
+    }
+}
diff --git a/ARMeilleure/State/FPRoundingMode.cs b/ARMeilleure/State/FPRoundingMode.cs
new file mode 100644
index 000000000..ee4f87668
--- /dev/null
+++ b/ARMeilleure/State/FPRoundingMode.cs
@@ -0,0 +1,10 @@
+namespace ARMeilleure.State
+{
+    public enum FPRoundingMode
+    {
+        ToNearest            = 0,
+        TowardsPlusInfinity  = 1,
+        TowardsMinusInfinity = 2,
+        TowardsZero          = 3
+    }
+}
diff --git a/ARMeilleure/State/FPSR.cs b/ARMeilleure/State/FPSR.cs
new file mode 100644
index 000000000..c20dc4393
--- /dev/null
+++ b/ARMeilleure/State/FPSR.cs
@@ -0,0 +1,11 @@
+using System;
+
+namespace ARMeilleure.State
+{
+    [Flags]
+    public enum FPSR
+    {
+        Ufc = 1 << 3,
+        Qc  = 1 << 27
+    }
+}
diff --git a/ARMeilleure/State/FPType.cs b/ARMeilleure/State/FPType.cs
new file mode 100644
index 000000000..84e0db8da
--- /dev/null
+++ b/ARMeilleure/State/FPType.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.State
+{
+    enum FPType
+    {
+        Nonzero,
+        Zero,
+        Infinity,
+        QNaN,
+        SNaN
+    }
+}
diff --git a/ARMeilleure/State/IExecutionContext.cs b/ARMeilleure/State/IExecutionContext.cs
new file mode 100644
index 000000000..df91b7a1e
--- /dev/null
+++ b/ARMeilleure/State/IExecutionContext.cs
@@ -0,0 +1,37 @@
+using System;
+
+namespace ARMeilleure.State
+{
+    public interface IExecutionContext : IDisposable
+    {
+        uint CtrEl0   { get; }
+        uint DczidEl0 { get; }
+
+        ulong CntfrqEl0 { get; set; }
+        ulong CntpctEl0 { get; }
+
+        long TpidrEl0 { get; set; }
+        long Tpidr    { get; set; }
+
+        FPCR Fpcr { get; set; }
+        FPSR Fpsr { get; set; }
+
+        bool IsAarch32 { get; set; }
+
+        bool Running { get; set; }
+
+        event EventHandler<EventArgs>              Interrupt;
+        event EventHandler<InstExceptionEventArgs> Break;
+        event EventHandler<InstExceptionEventArgs> SupervisorCall;
+        event EventHandler<InstUndefinedEventArgs> Undefined;
+
+        ulong GetX(int index);
+        void  SetX(int index, ulong value);
+
+        V128 GetV(int index);
+
+        bool GetPstateFlag(PState flag);
+
+        void RequestInterrupt();
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/InstExceptionEventArgs.cs b/ARMeilleure/State/InstExceptionEventArgs.cs
new file mode 100644
index 000000000..c2460e4b4
--- /dev/null
+++ b/ARMeilleure/State/InstExceptionEventArgs.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ARMeilleure.State
+{
+    public class InstExceptionEventArgs : EventArgs
+    {
+        public ulong Address { get; }
+        public int   Id      { get; }
+
+        public InstExceptionEventArgs(ulong address, int id)
+        {
+            Address = address;
+            Id      = id;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/InstUndefinedEventArgs.cs b/ARMeilleure/State/InstUndefinedEventArgs.cs
new file mode 100644
index 000000000..c02b648e1
--- /dev/null
+++ b/ARMeilleure/State/InstUndefinedEventArgs.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ARMeilleure.State
+{
+    public class InstUndefinedEventArgs : EventArgs
+    {
+        public ulong Address { get; }
+        public int   OpCode  { get; }
+
+        public InstUndefinedEventArgs(ulong address, int opCode)
+        {
+            Address = address;
+            OpCode  = opCode;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/NativeContext.cs b/ARMeilleure/State/NativeContext.cs
new file mode 100644
index 000000000..4e6a5302f
--- /dev/null
+++ b/ARMeilleure/State/NativeContext.cs
@@ -0,0 +1,157 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.State
+{
+    class NativeContext : IDisposable
+    {
+        private const int IntSize   = 8;
+        private const int VecSize   = 16;
+        private const int FlagSize  = 4;
+        private const int ExtraSize = 4;
+
+        private const int TotalSize = RegisterConsts.IntRegsCount * IntSize  +
+                                      RegisterConsts.VecRegsCount * VecSize  +
+                                      RegisterConsts.FlagsCount   * FlagSize + ExtraSize;
+
+        public IntPtr BasePtr { get; }
+
+        public NativeContext()
+        {
+            BasePtr = MemoryManagement.Allocate(TotalSize);
+        }
+
+        public ulong GetX(int index)
+        {
+            if ((uint)index >= RegisterConsts.IntRegsCount)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            return (ulong)Marshal.ReadInt64(BasePtr, index * IntSize);
+        }
+
+        public void SetX(int index, ulong value)
+        {
+            if ((uint)index >= RegisterConsts.IntRegsCount)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            Marshal.WriteInt64(BasePtr, index * IntSize, (long)value);
+        }
+
+        public V128 GetV(int index)
+        {
+            if ((uint)index >= RegisterConsts.IntRegsCount)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            int offset = RegisterConsts.IntRegsCount * IntSize + index * VecSize;
+
+            return new V128(
+                Marshal.ReadInt64(BasePtr, offset + 0),
+                Marshal.ReadInt64(BasePtr, offset + 8));
+        }
+
+        public void SetV(int index, V128 value)
+        {
+            if ((uint)index >= RegisterConsts.IntRegsCount)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            int offset = RegisterConsts.IntRegsCount * IntSize + index * VecSize;
+
+            Marshal.WriteInt64(BasePtr, offset + 0, value.GetInt64(0));
+            Marshal.WriteInt64(BasePtr, offset + 8, value.GetInt64(1));
+        }
+
+        public bool GetPstateFlag(PState flag)
+        {
+            if ((uint)flag >= RegisterConsts.FlagsCount)
+            {
+                throw new ArgumentException($"Invalid flag \"{flag}\" specified.");
+            }
+
+            int offset =
+                RegisterConsts.IntRegsCount * IntSize +
+                RegisterConsts.VecRegsCount * VecSize + (int)flag * FlagSize;
+
+            int value = Marshal.ReadInt32(BasePtr, offset);
+
+            return value != 0;
+        }
+
+        public void SetPstateFlag(PState flag, bool value)
+        {
+            if ((uint)flag >= RegisterConsts.FlagsCount)
+            {
+                throw new ArgumentException($"Invalid flag \"{flag}\" specified.");
+            }
+
+            int offset =
+                RegisterConsts.IntRegsCount * IntSize +
+                RegisterConsts.VecRegsCount * VecSize + (int)flag * FlagSize;
+
+            Marshal.WriteInt32(BasePtr, offset, value ? 1 : 0);
+        }
+
+        public int GetCounter()
+        {
+            return Marshal.ReadInt32(BasePtr, GetCounterOffset());
+        }
+
+        public void SetCounter(int value)
+        {
+            Marshal.WriteInt32(BasePtr, GetCounterOffset(), value);
+        }
+
+        public static int GetRegisterOffset(Register reg)
+        {
+            int offset, size;
+
+            if (reg.Type == RegisterType.Integer)
+            {
+                offset = reg.Index * IntSize;
+
+                size = IntSize;
+            }
+            else if (reg.Type == RegisterType.Vector)
+            {
+                offset = RegisterConsts.IntRegsCount * IntSize + reg.Index * VecSize;
+
+                size = VecSize;
+            }
+            else /* if (reg.Type == RegisterType.Flag) */
+            {
+                offset = RegisterConsts.IntRegsCount * IntSize +
+                         RegisterConsts.VecRegsCount * VecSize + reg.Index * FlagSize;
+
+                size = FlagSize;
+            }
+
+            if ((uint)(offset + size) > (uint)TotalSize)
+            {
+                throw new ArgumentException("Invalid register.");
+            }
+
+            return offset;
+        }
+
+        public static int GetCounterOffset()
+        {
+            return RegisterConsts.IntRegsCount * IntSize +
+                   RegisterConsts.VecRegsCount * VecSize +
+                   RegisterConsts.FlagsCount   * FlagSize;
+        }
+
+        public void Dispose()
+        {
+            MemoryManagement.Free(BasePtr);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/PState.cs b/ARMeilleure/State/PState.cs
new file mode 100644
index 000000000..ce755e952
--- /dev/null
+++ b/ARMeilleure/State/PState.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ARMeilleure.State
+{
+    [Flags]
+    public enum PState
+    {
+        TFlag = 5,
+        EFlag = 9,
+
+        VFlag = 28,
+        CFlag = 29,
+        ZFlag = 30,
+        NFlag = 31
+    }
+}
diff --git a/ARMeilleure/State/RegisterAlias.cs b/ARMeilleure/State/RegisterAlias.cs
new file mode 100644
index 000000000..ae0d45628
--- /dev/null
+++ b/ARMeilleure/State/RegisterAlias.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.State
+{
+    static class RegisterAlias
+    {
+        public const int R8Usr  = 8;
+        public const int R9Usr  = 9;
+        public const int R10Usr = 10;
+        public const int R11Usr = 11;
+        public const int R12Usr = 12;
+        public const int SpUsr  = 13;
+        public const int LrUsr  = 14;
+
+        public const int SpHyp = 15;
+
+        public const int LrIrq = 16;
+        public const int SpIrq = 17;
+
+        public const int LrSvc = 18;
+        public const int SpSvc = 19;
+
+        public const int LrAbt = 20;
+        public const int SpAbt = 21;
+
+        public const int LrUnd = 22;
+        public const int SpUnd = 23;
+
+        public const int R8Fiq  = 24;
+        public const int R9Fiq  = 25;
+        public const int R10Fiq = 26;
+        public const int R11Fiq = 27;
+        public const int R12Fiq = 28;
+        public const int SpFiq  = 29;
+        public const int LrFiq  = 30;
+
+        public const int Aarch32Lr = 14;
+        public const int Aarch32Pc = 15;
+
+        public const int Lr = 30;
+        public const int Zr = 31;
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/RegisterConsts.cs b/ARMeilleure/State/RegisterConsts.cs
new file mode 100644
index 000000000..a85117bb2
--- /dev/null
+++ b/ARMeilleure/State/RegisterConsts.cs
@@ -0,0 +1,13 @@
+namespace ARMeilleure.State
+{
+    static class RegisterConsts
+    {
+        public const int IntRegsCount       = 32;
+        public const int VecRegsCount       = 32;
+        public const int FlagsCount         = 32;
+        public const int IntAndVecRegsCount = IntRegsCount + VecRegsCount;
+        public const int TotalCount         = IntRegsCount + VecRegsCount + FlagsCount;
+
+        public const int ZeroIndex = 31;
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/State/V128.cs b/ARMeilleure/State/V128.cs
new file mode 100644
index 000000000..eeb9ff1ca
--- /dev/null
+++ b/ARMeilleure/State/V128.cs
@@ -0,0 +1,214 @@
+using System;
+
+namespace ARMeilleure.State
+{
+    public struct V128 : IEquatable<V128>
+    {
+        private ulong _e0;
+        private ulong _e1;
+
+        private static V128 _zero = new V128(0, 0);
+
+        public static V128 Zero => _zero;
+
+        public V128(float value) : this(value, 0, 0, 0) { }
+
+        public V128(double value) : this(value, 0) { }
+
+        public V128(float e0, float e1, float e2, float e3)
+        {
+            _e0  = (ulong)(uint)BitConverter.SingleToInt32Bits(e0) << 0;
+            _e0 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e1) << 32;
+            _e1  = (ulong)(uint)BitConverter.SingleToInt32Bits(e2) << 0;
+            _e1 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e3) << 32;
+        }
+
+        public V128(double e0, double e1)
+        {
+            _e0 = (ulong)BitConverter.DoubleToInt64Bits(e0);
+            _e1 = (ulong)BitConverter.DoubleToInt64Bits(e1);
+        }
+
+        public V128(int e0, int e1, int e2, int e3)
+        {
+            _e0  = (ulong)(uint)e0 << 0;
+            _e0 |= (ulong)(uint)e1 << 32;
+            _e1  = (ulong)(uint)e2 << 0;
+            _e1 |= (ulong)(uint)e3 << 32;
+        }
+
+        public V128(uint e0, uint e1, uint e2, uint e3)
+        {
+            _e0  = (ulong)e0 << 0;
+            _e0 |= (ulong)e1 << 32;
+            _e1  = (ulong)e2 << 0;
+            _e1 |= (ulong)e3 << 32;
+        }
+
+        public V128(long e0, long e1)
+        {
+            _e0 = (ulong)e0;
+            _e1 = (ulong)e1;
+        }
+
+        public V128(ulong e0, ulong e1)
+        {
+            _e0 = e0;
+            _e1 = e1;
+        }
+
+        public V128(byte[] data)
+        {
+            _e0 = (ulong)BitConverter.ToInt64(data, 0);
+            _e1 = (ulong)BitConverter.ToInt64(data, 8);
+        }
+
+        public void Insert(int index, uint value)
+        {
+            switch (index)
+            {
+                case 0: _e0 = (_e0 & 0xffffffff00000000) | ((ulong)value << 0);  break;
+                case 1: _e0 = (_e0 & 0x00000000ffffffff) | ((ulong)value << 32); break;
+                case 2: _e1 = (_e1 & 0xffffffff00000000) | ((ulong)value << 0);  break;
+                case 3: _e1 = (_e1 & 0x00000000ffffffff) | ((ulong)value << 32); break;
+
+                default: throw new ArgumentOutOfRangeException(nameof(index));
+            }
+        }
+
+        public void Insert(int index, ulong value)
+        {
+            switch (index)
+            {
+                case 0: _e0 = value; break;
+                case 1: _e1 = value; break;
+
+                default: throw new ArgumentOutOfRangeException(nameof(index));
+            }
+        }
+
+        public float AsFloat()
+        {
+            return GetFloat(0);
+        }
+
+        public double AsDouble()
+        {
+            return GetDouble(0);
+        }
+
+        public float GetFloat(int index)
+        {
+            return BitConverter.Int32BitsToSingle(GetInt32(index));
+        }
+
+        public double GetDouble(int index)
+        {
+            return BitConverter.Int64BitsToDouble(GetInt64(index));
+        }
+
+        public int  GetInt32(int index) => (int)GetUInt32(index);
+        public long GetInt64(int index) => (long)GetUInt64(index);
+
+        public uint GetUInt32(int index)
+        {
+            switch (index)
+            {
+                case 0: return (uint)(_e0 >> 0);
+                case 1: return (uint)(_e0 >> 32);
+                case 2: return (uint)(_e1 >> 0);
+                case 3: return (uint)(_e1 >> 32);
+            }
+
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+
+        public ulong GetUInt64(int index)
+        {
+            switch (index)
+            {
+                case 0: return _e0;
+                case 1: return _e1;
+            }
+
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+
+        public byte[] ToArray()
+        {
+            byte[] e0Data = BitConverter.GetBytes(_e0);
+            byte[] e1Data = BitConverter.GetBytes(_e1);
+
+            byte[] data = new byte[16];
+
+            Buffer.BlockCopy(e0Data, 0, data, 0, 8);
+            Buffer.BlockCopy(e1Data, 0, data, 8, 8);
+
+            return data;
+        }
+
+        public override int GetHashCode()
+        {
+            return HashCode.Combine(_e0, _e1);
+        }
+
+        public static V128 operator ~(V128 x)
+        {
+            return new V128(~x._e0, ~x._e1);
+        }
+
+        public static V128 operator &(V128 x, V128 y)
+        {
+            return new V128(x._e0 & y._e0, x._e1 & y._e1);
+        }
+
+        public static V128 operator |(V128 x, V128 y)
+        {
+            return new V128(x._e0 | y._e0, x._e1 | y._e1);
+        }
+
+        public static V128 operator ^(V128 x, V128 y)
+        {
+            return new V128(x._e0 ^ y._e0, x._e1 ^ y._e1);
+        }
+
+        public static V128 operator <<(V128 x, int shift)
+        {
+            ulong shiftOut = x._e0 >> (64 - shift);
+
+            return new V128(x._e0 << shift, (x._e1 << shift) | shiftOut);
+        }
+
+        public static V128 operator >>(V128 x, int shift)
+        {
+            ulong shiftOut = x._e1 & ((1UL << shift) - 1);
+
+            return new V128((x._e0 >> shift) | (shiftOut << (64 - shift)), x._e1 >> shift);
+        }
+
+        public static bool operator ==(V128 x, V128 y)
+        {
+            return x.Equals(y);
+        }
+
+        public static bool operator !=(V128 x, V128 y)
+        {
+            return !x.Equals(y);
+        }
+
+        public override bool Equals(object obj)
+        {
+            return obj is V128 vector && Equals(vector);
+        }
+
+        public bool Equals(V128 other)
+        {
+            return other._e0 == _e0 && other._e1 == _e1;
+        }
+
+        public override string ToString()
+        {
+            return $"0x{_e1:X16}{_e0:X16}";
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Statistics.cs b/ARMeilleure/Statistics.cs
new file mode 100644
index 000000000..e80ee59d6
--- /dev/null
+++ b/ARMeilleure/Statistics.cs
@@ -0,0 +1,92 @@
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+
+namespace ARMeilleure
+{
+    public static class Statistics
+    {
+        private const int ReportMaxFunctions = 100;
+
+        [ThreadStatic]
+        private static Stopwatch _executionTimer;
+
+        private static ConcurrentDictionary<ulong, long> _ticksPerFunction;
+
+        static Statistics()
+        {
+            _ticksPerFunction = new ConcurrentDictionary<ulong, long>();
+        }
+
+        public static void InitializeTimer()
+        {
+#if M_PROFILE
+            if (_executionTimer == null)
+            {
+                _executionTimer = new Stopwatch();
+            }
+#endif
+        }
+
+        internal static void StartTimer()
+        {
+#if M_PROFILE
+            _executionTimer.Restart();
+#endif
+        }
+
+        internal static void StopTimer(ulong funcAddr)
+        {
+#if M_PROFILE
+            _executionTimer.Stop();
+
+            long ticks = _executionTimer.ElapsedTicks;
+
+            _ticksPerFunction.AddOrUpdate(funcAddr, ticks, (key, oldTicks) => oldTicks + ticks);
+#endif
+        }
+
+        internal static void ResumeTimer()
+        {
+#if M_PROFILE
+            _executionTimer.Start();
+#endif
+        }
+
+        internal static void PauseTimer()
+        {
+#if M_PROFILE
+            _executionTimer.Stop();
+#endif
+        }
+
+        public static string GetReport()
+        {
+            int count = 0;
+
+            StringBuilder sb = new StringBuilder();
+
+            sb.AppendLine(" Function address   | Time");
+            sb.AppendLine("--------------------------");
+
+            KeyValuePair<ulong, long>[] funcTable = _ticksPerFunction.ToArray();
+
+            foreach (KeyValuePair<ulong, long> kv in funcTable.OrderByDescending(x => x.Value))
+            {
+                long timeInMs = (kv.Value * 1000) / Stopwatch.Frequency;
+
+                sb.AppendLine($" 0x{kv.Key:X16} | {timeInMs} ms");
+
+                if (count++ >= ReportMaxFunctions)
+                {
+                    break;
+                }
+            }
+
+            return sb.ToString();
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/ArmEmitterContext.cs b/ARMeilleure/Translation/ArmEmitterContext.cs
new file mode 100644
index 000000000..d35e985e6
--- /dev/null
+++ b/ARMeilleure/Translation/ArmEmitterContext.cs
@@ -0,0 +1,153 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+    class ArmEmitterContext : EmitterContext
+    {
+        private Dictionary<ulong, Operand> _labels;
+
+        private OpCode _optOpLastCompare;
+        private OpCode _optOpLastFlagSet;
+
+        private Operand _optCmpTempN;
+        private Operand _optCmpTempM;
+
+        private Block _currBlock;
+
+        public Block CurrBlock
+        {
+            get
+            {
+                return _currBlock;
+            }
+            set
+            {
+                _currBlock = value;
+
+                ResetBlockState();
+            }
+        }
+
+        public OpCode CurrOp { get; set; }
+
+        public MemoryManager Memory { get; }
+
+        public Aarch32Mode Mode { get; }
+
+        public ArmEmitterContext(MemoryManager memory, Aarch32Mode mode)
+        {
+            Memory = memory;
+            Mode   = mode;
+
+            _labels = new Dictionary<ulong, Operand>();
+        }
+
+        public Operand GetLabel(ulong address)
+        {
+            if (!_labels.TryGetValue(address, out Operand label))
+            {
+                label = Label();
+
+                _labels.Add(address, label);
+            }
+
+            return label;
+        }
+
+        public void MarkComparison(Operand n, Operand m)
+        {
+            _optOpLastCompare = CurrOp;
+
+            _optCmpTempN = Copy(n);
+            _optCmpTempM = Copy(m);
+        }
+
+        public void MarkFlagSet(PState stateFlag)
+        {
+            // Set this only if any of the NZCV flag bits were modified.
+            // This is used to ensure that when emiting a direct IL branch
+            // instruction for compare + branch sequences, we're not expecting
+            // to use comparison values from an old instruction, when in fact
+            // the flags were already overwritten by another instruction further along.
+            if (stateFlag >= PState.VFlag)
+            {
+                _optOpLastFlagSet = CurrOp;
+            }
+        }
+
+        private void ResetBlockState()
+        {
+            _optOpLastCompare = null;
+            _optOpLastFlagSet = null;
+        }
+
+        public Operand TryGetComparisonResult(Condition condition)
+        {
+            if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)
+            {
+                return null;
+            }
+
+            Operand n = _optCmpTempN;
+            Operand m = _optCmpTempM;
+
+            InstName cmpName = _optOpLastCompare.Instruction.Name;
+
+            if (cmpName == InstName.Subs)
+            {
+                switch (condition)
+                {
+                    case Condition.Eq:   return ICompareEqual           (n, m);
+                    case Condition.Ne:   return ICompareNotEqual        (n, m);
+                    case Condition.GeUn: return ICompareGreaterOrEqualUI(n, m);
+                    case Condition.LtUn: return ICompareLessUI          (n, m);
+                    case Condition.GtUn: return ICompareGreaterUI       (n, m);
+                    case Condition.LeUn: return ICompareLessOrEqualUI   (n, m);
+                    case Condition.Ge:   return ICompareGreaterOrEqual  (n, m);
+                    case Condition.Lt:   return ICompareLess            (n, m);
+                    case Condition.Gt:   return ICompareGreater         (n, m);
+                    case Condition.Le:   return ICompareLessOrEqual     (n, m);
+                }
+            }
+            else if (cmpName == InstName.Adds && _optOpLastCompare is IOpCodeAluImm op)
+            {
+                // There are several limitations that needs to be taken into account for CMN comparisons:
+                // - The unsigned comparisons are not valid, as they depend on the
+                // carry flag value, and they will have different values for addition and
+                // subtraction. For addition, it's carry, and for subtraction, it's borrow.
+                // So, we need to make sure we're not doing a unsigned compare for the CMN case.
+                // - We can only do the optimization for the immediate variants,
+                // because when the second operand value is exactly INT_MIN, we can't
+                // negate the value as theres no positive counterpart.
+                // Such invalid values can't be encoded on the immediate encodings.
+                if (op.RegisterSize == RegisterSize.Int32)
+                {
+                    m = Const((int)-op.Immediate);
+                }
+                else
+                {
+                    m = Const(-op.Immediate);
+                }
+
+                switch (condition)
+                {
+                    case Condition.Eq: return ICompareEqual         (n, m);
+                    case Condition.Ne: return ICompareNotEqual      (n, m);
+                    case Condition.Ge: return ICompareGreaterOrEqual(n, m);
+                    case Condition.Lt: return ICompareLess          (n, m);
+                    case Condition.Gt: return ICompareGreater       (n, m);
+                    case Condition.Le: return ICompareLessOrEqual   (n, m);
+                }
+            }
+
+            return null;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/Compiler.cs b/ARMeilleure/Translation/Compiler.cs
new file mode 100644
index 000000000..4075a7f06
--- /dev/null
+++ b/ARMeilleure/Translation/Compiler.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.CodeGen;
+using ARMeilleure.CodeGen.X86;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation
+{
+    static class Compiler
+    {
+        public static T Compile<T>(
+            ControlFlowGraph cfg,
+            OperandType[]    funcArgTypes,
+            OperandType      funcReturnType,
+            CompilerOptions  options)
+        {
+            Logger.StartPass(PassName.Dominance);
+
+            Dominance.FindDominators(cfg);
+            Dominance.FindDominanceFrontiers(cfg);
+
+            Logger.EndPass(PassName.Dominance);
+
+            Logger.StartPass(PassName.SsaConstruction);
+
+            if ((options & CompilerOptions.SsaForm) != 0)
+            {
+                Ssa.Construct(cfg);
+            }
+            else
+            {
+                RegisterToLocal.Rename(cfg);
+            }
+
+            Logger.EndPass(PassName.SsaConstruction, cfg);
+
+            CompilerContext cctx = new CompilerContext(cfg, funcArgTypes, funcReturnType, options);
+
+            CompiledFunction func = CodeGenerator.Generate(cctx);
+
+            IntPtr codePtr = JitCache.Map(func);
+
+            return Marshal.GetDelegateForFunctionPointer<T>(codePtr);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/CompilerContext.cs b/ARMeilleure/Translation/CompilerContext.cs
new file mode 100644
index 000000000..cfe5ad1e5
--- /dev/null
+++ b/ARMeilleure/Translation/CompilerContext.cs
@@ -0,0 +1,26 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.Translation
+{
+    struct CompilerContext
+    {
+        public ControlFlowGraph Cfg { get; }
+
+        public OperandType[] FuncArgTypes   { get; }
+        public OperandType   FuncReturnType { get; }
+
+        public CompilerOptions Options { get; }
+
+        public CompilerContext(
+            ControlFlowGraph cfg,
+            OperandType[]    funcArgTypes,
+            OperandType      funcReturnType,
+            CompilerOptions  options)
+        {
+            Cfg            = cfg;
+            FuncArgTypes   = funcArgTypes;
+            FuncReturnType = funcReturnType;
+            Options        = options;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/CompilerOptions.cs b/ARMeilleure/Translation/CompilerOptions.cs
new file mode 100644
index 000000000..53998ec6f
--- /dev/null
+++ b/ARMeilleure/Translation/CompilerOptions.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace ARMeilleure.Translation
+{
+    [Flags]
+    enum CompilerOptions
+    {
+        None     = 0,
+        SsaForm  = 1 << 0,
+        Optimize = 1 << 1,
+        Lsra     = 1 << 2,
+
+        MediumCq = SsaForm | Optimize,
+        HighCq   = SsaForm | Optimize | Lsra
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/ControlFlowGraph.cs b/ARMeilleure/Translation/ControlFlowGraph.cs
new file mode 100644
index 000000000..758f1f968
--- /dev/null
+++ b/ARMeilleure/Translation/ControlFlowGraph.cs
@@ -0,0 +1,158 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace ARMeilleure.Translation
+{
+    class ControlFlowGraph
+    {
+        public BasicBlock Entry { get; }
+
+        public LinkedList<BasicBlock> Blocks { get; }
+
+        public BasicBlock[] PostOrderBlocks { get; }
+
+        public int[] PostOrderMap { get; }
+
+        public ControlFlowGraph(BasicBlock entry, LinkedList<BasicBlock> blocks)
+        {
+            Entry  = entry;
+            Blocks = blocks;
+
+            RemoveUnreachableBlocks(blocks);
+
+            HashSet<BasicBlock> visited = new HashSet<BasicBlock>();
+
+            Stack<BasicBlock> blockStack = new Stack<BasicBlock>();
+
+            PostOrderBlocks = new BasicBlock[blocks.Count];
+
+            PostOrderMap = new int[blocks.Count];
+
+            visited.Add(entry);
+
+            blockStack.Push(entry);
+
+            int index = 0;
+
+            while (blockStack.TryPop(out BasicBlock block))
+            {
+                if (block.Next != null && visited.Add(block.Next))
+                {
+                    blockStack.Push(block);
+                    blockStack.Push(block.Next);
+                }
+                else if (block.Branch != null && visited.Add(block.Branch))
+                {
+                    blockStack.Push(block);
+                    blockStack.Push(block.Branch);
+                }
+                else
+                {
+                    PostOrderMap[block.Index] = index;
+
+                    PostOrderBlocks[index++] = block;
+                }
+            }
+        }
+
+        private void RemoveUnreachableBlocks(LinkedList<BasicBlock> blocks)
+        {
+            HashSet<BasicBlock> visited = new HashSet<BasicBlock>();
+
+            Queue<BasicBlock> workQueue = new Queue<BasicBlock>();
+
+            visited.Add(Entry);
+
+            workQueue.Enqueue(Entry);
+
+            while (workQueue.TryDequeue(out BasicBlock block))
+            {
+                Debug.Assert(block.Index != -1, "Invalid block index.");
+
+                if (block.Next != null && visited.Add(block.Next))
+                {
+                    workQueue.Enqueue(block.Next);
+                }
+
+                if (block.Branch != null && visited.Add(block.Branch))
+                {
+                    workQueue.Enqueue(block.Branch);
+                }
+            }
+
+            if (visited.Count < blocks.Count)
+            {
+                // Remove unreachable blocks and renumber.
+                int index = 0;
+
+                for (LinkedListNode<BasicBlock> node = blocks.First; node != null;)
+                {
+                    LinkedListNode<BasicBlock> nextNode = node.Next;
+
+                    BasicBlock block = node.Value;
+
+                    if (!visited.Contains(block))
+                    {
+                        block.Next   = null;
+                        block.Branch = null;
+
+                        blocks.Remove(node);
+                    }
+                    else
+                    {
+                        block.Index = index++;
+                    }
+
+                    node = nextNode;
+                }
+            }
+        }
+
+        public BasicBlock SplitEdge(BasicBlock predecessor, BasicBlock successor)
+        {
+            BasicBlock splitBlock = new BasicBlock(Blocks.Count);
+
+            if (predecessor.Next == successor)
+            {
+                predecessor.Next = splitBlock;
+            }
+
+            if (predecessor.Branch == successor)
+            {
+                predecessor.Branch = splitBlock;
+            }
+
+            if (splitBlock.Predecessors.Count == 0)
+            {
+                throw new ArgumentException("Predecessor and successor are not connected.");
+            }
+
+            // Insert the new block on the list of blocks.
+            BasicBlock succPrev = successor.Node.Previous?.Value;
+
+            if (succPrev != null && succPrev != predecessor && succPrev.Next == successor)
+            {
+                // Can't insert after the predecessor or before the successor.
+                // Here, we insert it before the successor by also spliting another
+                // edge (the one between the block before "successor" and "successor").
+                BasicBlock splitBlock2 = new BasicBlock(splitBlock.Index + 1);
+
+                succPrev.Next = splitBlock2;
+
+                splitBlock2.Branch = successor;
+
+                splitBlock2.Operations.AddLast(new Operation(Instruction.Branch, null));
+
+                Blocks.AddBefore(successor.Node, splitBlock2);
+            }
+
+            splitBlock.Next = successor;
+
+            Blocks.AddBefore(successor.Node, splitBlock);
+
+            return splitBlock;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/DelegateCache.cs b/ARMeilleure/Translation/DelegateCache.cs
new file mode 100644
index 000000000..7328c61a6
--- /dev/null
+++ b/ARMeilleure/Translation/DelegateCache.cs
@@ -0,0 +1,26 @@
+using System;
+using System.Collections.Concurrent;
+using System.Reflection;
+
+namespace ARMeilleure.Translation
+{
+    static class DelegateCache
+    {
+        private static ConcurrentDictionary<string, Delegate> _delegates;
+
+        static DelegateCache()
+        {
+            _delegates = new ConcurrentDictionary<string, Delegate>();
+        }
+
+        public static Delegate GetOrAdd(Delegate dlg)
+        {
+            return _delegates.GetOrAdd(GetKey(dlg.Method), (key) => dlg);
+        }
+
+        private static string GetKey(MethodInfo info)
+        {
+            return $"{info.DeclaringType.FullName}.{info.Name}";
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/Dominance.cs b/ARMeilleure/Translation/Dominance.cs
new file mode 100644
index 000000000..bb55169ed
--- /dev/null
+++ b/ARMeilleure/Translation/Dominance.cs
@@ -0,0 +1,95 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Diagnostics;
+
+namespace ARMeilleure.Translation
+{
+    static class Dominance
+    {
+        // Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm".
+        // https://www.cs.rice.edu/~keith/EMBED/dom.pdf
+        public static void FindDominators(ControlFlowGraph cfg)
+        {
+            BasicBlock Intersect(BasicBlock block1, BasicBlock block2)
+            {
+                while (block1 != block2)
+                {
+                    while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index])
+                    {
+                        block1 = block1.ImmediateDominator;
+                    }
+
+                    while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index])
+                    {
+                        block2 = block2.ImmediateDominator;
+                    }
+                }
+
+                return block1;
+            }
+
+            cfg.Entry.ImmediateDominator = cfg.Entry;
+
+            Debug.Assert(cfg.Entry == cfg.PostOrderBlocks[cfg.PostOrderBlocks.Length - 1]);
+
+            bool modified;
+
+            do
+            {
+                modified = false;
+
+                for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--)
+                {
+                    BasicBlock block = cfg.PostOrderBlocks[blkIndex];
+
+                    BasicBlock newIDom = null;
+
+                    foreach (BasicBlock predecessor in block.Predecessors)
+                    {
+                        if (predecessor.ImmediateDominator != null)
+                        {
+                            if (newIDom != null)
+                            {
+                                newIDom = Intersect(predecessor, newIDom);
+                            }
+                            else
+                            {
+                                newIDom = predecessor;
+                            }
+                        }
+                    }
+
+                    if (block.ImmediateDominator != newIDom)
+                    {
+                        block.ImmediateDominator = newIDom;
+
+                        modified = true;
+                    }
+                }
+            }
+            while (modified);
+        }
+
+        public static void FindDominanceFrontiers(ControlFlowGraph cfg)
+        {
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                if (block.Predecessors.Count < 2)
+                {
+                    continue;
+                }
+
+                for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++)
+                {
+                    BasicBlock current = block.Predecessors[pBlkIndex];
+
+                    while (current != block.ImmediateDominator)
+                    {
+                        current.DominanceFrontiers.Add(block);
+
+                        current = current.ImmediateDominator;
+                    }
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/EmitterContext.cs b/ARMeilleure/Translation/EmitterContext.cs
new file mode 100644
index 000000000..13cf677c7
--- /dev/null
+++ b/ARMeilleure/Translation/EmitterContext.cs
@@ -0,0 +1,562 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+    class EmitterContext
+    {
+        private Dictionary<Operand, BasicBlock> _irLabels;
+
+        private LinkedList<BasicBlock> _irBlocks;
+
+        private BasicBlock _irBlock;
+
+        private bool _needsNewBlock;
+
+        public EmitterContext()
+        {
+            _irLabels = new Dictionary<Operand, BasicBlock>();
+
+            _irBlocks = new LinkedList<BasicBlock>();
+
+            _needsNewBlock = true;
+        }
+
+        public Operand Add(Operand op1, Operand op2)
+        {
+            return Add(Instruction.Add, Local(op1.Type), op1, op2);
+        }
+
+        public Operand BitwiseAnd(Operand op1, Operand op2)
+        {
+            return Add(Instruction.BitwiseAnd, Local(op1.Type), op1, op2);
+        }
+
+        public Operand BitwiseExclusiveOr(Operand op1, Operand op2)
+        {
+            return Add(Instruction.BitwiseExclusiveOr, Local(op1.Type), op1, op2);
+        }
+
+        public Operand BitwiseNot(Operand op1)
+        {
+            return Add(Instruction.BitwiseNot, Local(op1.Type), op1);
+        }
+
+        public Operand BitwiseOr(Operand op1, Operand op2)
+        {
+            return Add(Instruction.BitwiseOr, Local(op1.Type), op1, op2);
+        }
+
+        public void Branch(Operand label)
+        {
+            Add(Instruction.Branch, null);
+
+            BranchToLabel(label);
+        }
+
+        public void BranchIfFalse(Operand label, Operand op1)
+        {
+            Add(Instruction.BranchIfFalse, null, op1);
+
+            BranchToLabel(label);
+        }
+
+        public void BranchIfTrue(Operand label, Operand op1)
+        {
+            Add(Instruction.BranchIfTrue, null, op1);
+
+            BranchToLabel(label);
+        }
+
+        public Operand ByteSwap(Operand op1)
+        {
+            return Add(Instruction.ByteSwap, Local(op1.Type), op1);
+        }
+
+        public Operand Call(Delegate func, params Operand[] callArgs)
+        {
+            // Add the delegate to the cache to ensure it will not be garbage collected.
+            func = DelegateCache.GetOrAdd(func);
+
+            IntPtr ptr = Marshal.GetFunctionPointerForDelegate<Delegate>(func);
+
+            OperandType returnType = GetOperandType(func.Method.ReturnType);
+
+            return Call(Const(ptr.ToInt64()), returnType, callArgs);
+        }
+
+        private static Dictionary<TypeCode, OperandType> _typeCodeToOperandTypeMap =
+                   new Dictionary<TypeCode, OperandType>()
+        {
+            { TypeCode.Boolean, OperandType.I32  },
+            { TypeCode.Byte,    OperandType.I32  },
+            { TypeCode.Char,    OperandType.I32  },
+            { TypeCode.Double,  OperandType.FP64 },
+            { TypeCode.Int16,   OperandType.I32  },
+            { TypeCode.Int32,   OperandType.I32  },
+            { TypeCode.Int64,   OperandType.I64  },
+            { TypeCode.SByte,   OperandType.I32  },
+            { TypeCode.Single,  OperandType.FP32 },
+            { TypeCode.UInt16,  OperandType.I32  },
+            { TypeCode.UInt32,  OperandType.I32  },
+            { TypeCode.UInt64,  OperandType.I64  }
+        };
+
+        private static OperandType GetOperandType(Type type)
+        {
+            if (_typeCodeToOperandTypeMap.TryGetValue(Type.GetTypeCode(type), out OperandType ot))
+            {
+                return ot;
+            }
+            else if (type == typeof(V128))
+            {
+                return OperandType.V128;
+            }
+            else if (type == typeof(void))
+            {
+                return OperandType.None;
+            }
+
+            throw new ArgumentException($"Invalid type \"{type.Name}\".");
+        }
+
+        public Operand Call(Operand address, OperandType returnType, params Operand[] callArgs)
+        {
+            Operand[] args = new Operand[callArgs.Length + 1];
+
+            args[0] = address;
+
+            Array.Copy(callArgs, 0, args, 1, callArgs.Length);
+
+            if (returnType != OperandType.None)
+            {
+                return Add(Instruction.Call, Local(returnType), args);
+            }
+            else
+            {
+                return Add(Instruction.Call, null, args);
+            }
+        }
+
+        public Operand CompareAndSwap128(Operand address, Operand expected, Operand desired)
+        {
+            return Add(Instruction.CompareAndSwap128, Local(OperandType.V128), address, expected, desired);
+        }
+
+        public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3)
+        {
+            return Add(Instruction.ConditionalSelect, Local(op2.Type), op1, op2, op3);
+        }
+
+        public Operand ConvertI64ToI32(Operand op1)
+        {
+            if (op1.Type != OperandType.I64)
+            {
+                throw new ArgumentException($"Invalid operand type \"{op1.Type}\".");
+            }
+
+            return Add(Instruction.ConvertI64ToI32, Local(OperandType.I32), op1);
+        }
+
+        public Operand ConvertToFP(OperandType type, Operand op1)
+        {
+            return Add(Instruction.ConvertToFP, Local(type), op1);
+        }
+
+        public Operand ConvertToFPUI(OperandType type, Operand op1)
+        {
+            return Add(Instruction.ConvertToFPUI, Local(type), op1);
+        }
+
+        public Operand Copy(Operand op1)
+        {
+            return Add(Instruction.Copy, Local(op1.Type), op1);
+        }
+
+        public Operand Copy(Operand dest, Operand op1)
+        {
+            if (dest.Kind != OperandKind.Register)
+            {
+                throw new ArgumentException($"Invalid dest operand kind \"{dest.Kind}\".");
+            }
+
+            return Add(Instruction.Copy, dest, op1);
+        }
+
+        public Operand CountLeadingZeros(Operand op1)
+        {
+            return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1);
+        }
+
+        internal Operand CpuId()
+        {
+            return Add(Instruction.CpuId, Local(OperandType.I64));
+        }
+
+        public Operand Divide(Operand op1, Operand op2)
+        {
+            return Add(Instruction.Divide, Local(op1.Type), op1, op2);
+        }
+
+        public Operand DivideUI(Operand op1, Operand op2)
+        {
+            return Add(Instruction.DivideUI, Local(op1.Type), op1, op2);
+        }
+
+        public Operand ICompareEqual(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareEqual, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand ICompareGreater(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareGreater, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand ICompareGreaterOrEqual(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareGreaterOrEqual, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand ICompareGreaterOrEqualUI(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareGreaterOrEqualUI, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand ICompareGreaterUI(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareGreaterUI, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand ICompareLess(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareLess, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand ICompareLessOrEqual(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareLessOrEqual, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand ICompareLessOrEqualUI(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareLessOrEqualUI, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand ICompareLessUI(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareLessUI, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand ICompareNotEqual(Operand op1, Operand op2)
+        {
+            return Add(Instruction.CompareNotEqual, Local(OperandType.I32), op1, op2);
+        }
+
+        public Operand Load(OperandType type, Operand address)
+        {
+            return Add(Instruction.Load, Local(type), address);
+        }
+
+        public Operand Load16(Operand address)
+        {
+            return Add(Instruction.Load16, Local(OperandType.I32), address);
+        }
+
+        public Operand Load8(Operand address)
+        {
+            return Add(Instruction.Load8, Local(OperandType.I32), address);
+        }
+
+        public Operand LoadArgument(OperandType type, int index)
+        {
+            return Add(Instruction.LoadArgument, Local(type), Const(index));
+        }
+
+        public void LoadFromContext()
+        {
+            _needsNewBlock = true;
+
+            Add(Instruction.LoadFromContext);
+        }
+
+        public Operand Multiply(Operand op1, Operand op2)
+        {
+            return Add(Instruction.Multiply, Local(op1.Type), op1, op2);
+        }
+
+        public Operand Multiply64HighSI(Operand op1, Operand op2)
+        {
+            return Add(Instruction.Multiply64HighSI, Local(OperandType.I64), op1, op2);
+        }
+
+        public Operand Multiply64HighUI(Operand op1, Operand op2)
+        {
+            return Add(Instruction.Multiply64HighUI, Local(OperandType.I64), op1, op2);
+        }
+
+        public Operand Negate(Operand op1)
+        {
+            return Add(Instruction.Negate, Local(op1.Type), op1);
+        }
+
+        public void Return()
+        {
+            Add(Instruction.Return);
+
+            _needsNewBlock = true;
+        }
+
+        public void Return(Operand op1)
+        {
+            Add(Instruction.Return, null, op1);
+
+            _needsNewBlock = true;
+        }
+
+        public Operand RotateRight(Operand op1, Operand op2)
+        {
+            return Add(Instruction.RotateRight, Local(op1.Type), op1, op2);
+        }
+
+        public Operand ShiftLeft(Operand op1, Operand op2)
+        {
+            return Add(Instruction.ShiftLeft, Local(op1.Type), op1, op2);
+        }
+
+        public Operand ShiftRightSI(Operand op1, Operand op2)
+        {
+            return Add(Instruction.ShiftRightSI, Local(op1.Type), op1, op2);
+        }
+
+        public Operand ShiftRightUI(Operand op1, Operand op2)
+        {
+            return Add(Instruction.ShiftRightUI, Local(op1.Type), op1, op2);
+        }
+
+        public Operand SignExtend16(OperandType type, Operand op1)
+        {
+            return Add(Instruction.SignExtend16, Local(type), op1);
+        }
+
+        public Operand SignExtend32(OperandType type, Operand op1)
+        {
+            return Add(Instruction.SignExtend32, Local(type), op1);
+        }
+
+        public Operand SignExtend8(OperandType type, Operand op1)
+        {
+            return Add(Instruction.SignExtend8, Local(type), op1);
+        }
+
+        public void Store(Operand address, Operand value)
+        {
+            Add(Instruction.Store, null, address, value);
+        }
+
+        public void Store16(Operand address, Operand value)
+        {
+            Add(Instruction.Store16, null, address, value);
+        }
+
+        public void Store8(Operand address, Operand value)
+        {
+            Add(Instruction.Store8, null, address, value);
+        }
+
+        public void StoreToContext()
+        {
+            Add(Instruction.StoreToContext);
+
+            _needsNewBlock = true;
+        }
+
+        public Operand Subtract(Operand op1, Operand op2)
+        {
+            return Add(Instruction.Subtract, Local(op1.Type), op1, op2);
+        }
+
+        public Operand VectorCreateScalar(Operand value)
+        {
+            return Add(Instruction.VectorCreateScalar, Local(OperandType.V128), value);
+        }
+
+        public Operand VectorExtract(OperandType type, Operand vector, int index)
+        {
+            return Add(Instruction.VectorExtract, Local(type), vector, Const(index));
+        }
+
+        public Operand VectorExtract16(Operand vector, int index)
+        {
+            return Add(Instruction.VectorExtract16, Local(OperandType.I32), vector, Const(index));
+        }
+
+        public Operand VectorExtract8(Operand vector, int index)
+        {
+            return Add(Instruction.VectorExtract8, Local(OperandType.I32), vector, Const(index));
+        }
+
+        public Operand VectorInsert(Operand vector, Operand value, int index)
+        {
+            return Add(Instruction.VectorInsert, Local(OperandType.V128), vector, value, Const(index));
+        }
+
+        public Operand VectorInsert16(Operand vector, Operand value, int index)
+        {
+            return Add(Instruction.VectorInsert16, Local(OperandType.V128), vector, value, Const(index));
+        }
+
+        public Operand VectorInsert8(Operand vector, Operand value, int index)
+        {
+            return Add(Instruction.VectorInsert8, Local(OperandType.V128), vector, value, Const(index));
+        }
+
+        public Operand VectorZero()
+        {
+            return Add(Instruction.VectorZero, Local(OperandType.V128));
+        }
+
+        public Operand VectorZeroUpper64(Operand vector)
+        {
+            return Add(Instruction.VectorZeroUpper64, Local(OperandType.V128), vector);
+        }
+
+        public Operand VectorZeroUpper96(Operand vector)
+        {
+            return Add(Instruction.VectorZeroUpper96, Local(OperandType.V128), vector);
+        }
+
+        public Operand ZeroExtend16(OperandType type, Operand op1)
+        {
+            return Add(Instruction.ZeroExtend16, Local(type), op1);
+        }
+
+        public Operand ZeroExtend32(OperandType type, Operand op1)
+        {
+            return Add(Instruction.ZeroExtend32, Local(type), op1);
+        }
+
+        public Operand ZeroExtend8(OperandType type, Operand op1)
+        {
+            return Add(Instruction.ZeroExtend8, Local(type), op1);
+        }
+
+        private Operand Add(Instruction inst, Operand dest = null, params Operand[] sources)
+        {
+            if (_needsNewBlock)
+            {
+                NewNextBlock();
+            }
+
+            Operation operation = new Operation(inst, dest, sources);
+
+            _irBlock.Operations.AddLast(operation);
+
+            return dest;
+        }
+
+        public Operand AddIntrinsic(Intrinsic intrin, params Operand[] args)
+        {
+            return Add(intrin, Local(OperandType.V128), args);
+        }
+
+        public Operand AddIntrinsicInt(Intrinsic intrin, params Operand[] args)
+        {
+            return Add(intrin, Local(OperandType.I32), args);
+        }
+
+        public Operand AddIntrinsicLong(Intrinsic intrin, params Operand[] args)
+        {
+            return Add(intrin, Local(OperandType.I64), args);
+        }
+
+        private Operand Add(Intrinsic intrin, Operand dest, params Operand[] sources)
+        {
+            if (_needsNewBlock)
+            {
+                NewNextBlock();
+            }
+
+            IntrinsicOperation operation = new IntrinsicOperation(intrin, dest, sources);
+
+            _irBlock.Operations.AddLast(operation);
+
+            return dest;
+        }
+
+        private void BranchToLabel(Operand label)
+        {
+            if (!_irLabels.TryGetValue(label, out BasicBlock branchBlock))
+            {
+                branchBlock = new BasicBlock();
+
+                _irLabels.Add(label, branchBlock);
+            }
+
+            _irBlock.Branch = branchBlock;
+
+            _needsNewBlock = true;
+        }
+
+        public void MarkLabel(Operand label)
+        {
+            if (_irLabels.TryGetValue(label, out BasicBlock nextBlock))
+            {
+                nextBlock.Index = _irBlocks.Count;
+                nextBlock.Node  = _irBlocks.AddLast(nextBlock);
+
+                NextBlock(nextBlock);
+            }
+            else
+            {
+                NewNextBlock();
+
+                _irLabels.Add(label, _irBlock);
+            }
+        }
+
+        private void NewNextBlock()
+        {
+            BasicBlock block = new BasicBlock(_irBlocks.Count);
+
+            block.Node = _irBlocks.AddLast(block);
+
+            NextBlock(block);
+        }
+
+        private void NextBlock(BasicBlock nextBlock)
+        {
+            if (_irBlock != null && !EndsWithUnconditional(_irBlock))
+            {
+                _irBlock.Next = nextBlock;
+            }
+
+            _irBlock = nextBlock;
+
+            _needsNewBlock = false;
+        }
+
+        private static bool EndsWithUnconditional(BasicBlock block)
+        {
+            Operation lastOp = block.GetLastOp() as Operation;
+
+            if (lastOp == null)
+            {
+                return false;
+            }
+
+            return lastOp.Instruction == Instruction.Branch ||
+                   lastOp.Instruction == Instruction.Return;
+        }
+
+        public ControlFlowGraph GetControlFlowGraph()
+        {
+            return new ControlFlowGraph(_irBlocks.First.Value, _irBlocks);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/GuestFunction.cs b/ARMeilleure/Translation/GuestFunction.cs
new file mode 100644
index 000000000..ac131a0d1
--- /dev/null
+++ b/ARMeilleure/Translation/GuestFunction.cs
@@ -0,0 +1,6 @@
+using System;
+
+namespace ARMeilleure.Translation
+{
+    delegate ulong GuestFunction(IntPtr nativeContextPtr);
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/ITranslator.cs b/ARMeilleure/Translation/ITranslator.cs
new file mode 100644
index 000000000..1063d3a65
--- /dev/null
+++ b/ARMeilleure/Translation/ITranslator.cs
@@ -0,0 +1,9 @@
+using ARMeilleure.State;
+
+namespace ARMeilleure.Translation
+{
+    public interface ITranslator
+    {
+        void Execute(IExecutionContext context, ulong address);
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/JitCache.cs b/ARMeilleure/Translation/JitCache.cs
new file mode 100644
index 000000000..73f04a966
--- /dev/null
+++ b/ARMeilleure/Translation/JitCache.cs
@@ -0,0 +1,135 @@
+using ARMeilleure.CodeGen;
+using ARMeilleure.Memory;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation
+{
+    static class JitCache
+    {
+        private const int PageSize = 4 * 1024;
+        private const int PageMask = PageSize - 1;
+
+        private const int CodeAlignment = 4; // Bytes
+
+        private const int CacheSize = 512 * 1024 * 1024;
+
+        private static IntPtr _basePointer;
+
+        private static int _offset;
+
+        private static List<JitCacheEntry> _cacheEntries;
+
+        private static object _lock;
+
+        static JitCache()
+        {
+            _basePointer = MemoryManagement.Allocate(CacheSize);
+
+            if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+            {
+                JitUnwindWindows.InstallFunctionTableHandler(_basePointer, CacheSize);
+
+                // The first page is used for the table based SEH structs.
+                _offset = PageSize;
+            }
+
+            _cacheEntries = new List<JitCacheEntry>();
+
+            _lock = new object();
+        }
+
+        public static IntPtr Map(CompiledFunction func)
+        {
+            byte[] code = func.Code;
+
+            lock (_lock)
+            {
+                int funcOffset = Allocate(code.Length);
+
+                IntPtr funcPtr = _basePointer + funcOffset;
+
+                Marshal.Copy(code, 0, funcPtr, code.Length);
+
+                ReprotectRange(funcOffset, code.Length);
+
+                Add(new JitCacheEntry(funcOffset, code.Length, func.UnwindInfo));
+
+                return funcPtr;
+            }
+        }
+
+        private static void ReprotectRange(int offset, int size)
+        {
+            // Map pages that are already full as RX.
+            // Map pages that are not full yet as RWX.
+            // On unix, the address must be page aligned.
+            int endOffs = offset + size;
+
+            int pageStart = offset  & ~PageMask;
+            int pageEnd   = endOffs & ~PageMask;
+
+            int fullPagesSize = pageEnd - pageStart;
+
+            if (fullPagesSize != 0)
+            {
+                IntPtr funcPtr = _basePointer + pageStart;
+
+                MemoryManagement.Reprotect(funcPtr, (ulong)fullPagesSize, MemoryProtection.ReadAndExecute);
+            }
+
+            int remaining = endOffs - pageEnd;
+
+            if (remaining != 0)
+            {
+                IntPtr funcPtr = _basePointer + pageEnd;
+
+                MemoryManagement.Reprotect(funcPtr, (ulong)remaining, MemoryProtection.ReadWriteExecute);
+            }
+        }
+
+        private static int Allocate(int codeSize)
+        {
+            codeSize = checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
+
+            int allocOffset = _offset;
+
+            _offset += codeSize;
+
+            if ((ulong)(uint)_offset > CacheSize)
+            {
+                throw new OutOfMemoryException();
+            }
+
+            return allocOffset;
+        }
+
+        private static void Add(JitCacheEntry entry)
+        {
+            _cacheEntries.Add(entry);
+        }
+
+        public static bool TryFind(int offset, out JitCacheEntry entry)
+        {
+            lock (_lock)
+            {
+                foreach (JitCacheEntry cacheEntry in _cacheEntries)
+                {
+                    int endOffset = cacheEntry.Offset + cacheEntry.Size;
+
+                    if (offset >= cacheEntry.Offset && offset < endOffset)
+                    {
+                        entry = cacheEntry;
+
+                        return true;
+                    }
+                }
+            }
+
+            entry = default(JitCacheEntry);
+
+            return false;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/JitCacheEntry.cs b/ARMeilleure/Translation/JitCacheEntry.cs
new file mode 100644
index 000000000..87d020e68
--- /dev/null
+++ b/ARMeilleure/Translation/JitCacheEntry.cs
@@ -0,0 +1,19 @@
+using ARMeilleure.CodeGen.Unwinding;
+
+namespace ARMeilleure.Translation
+{
+    struct JitCacheEntry
+    {
+        public int Offset { get; }
+        public int Size   { get; }
+
+        public UnwindInfo UnwindInfo { get; }
+
+        public JitCacheEntry(int offset, int size, UnwindInfo unwindInfo)
+        {
+            Offset     = offset;
+            Size       = size;
+            UnwindInfo = unwindInfo;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/JitUnwindWindows.cs b/ARMeilleure/Translation/JitUnwindWindows.cs
new file mode 100644
index 000000000..108dc2c56
--- /dev/null
+++ b/ARMeilleure/Translation/JitUnwindWindows.cs
@@ -0,0 +1,164 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.Translation
+{
+    static class JitUnwindWindows
+    {
+        private const int MaxUnwindCodesArraySize = 9 + 10 * 2 + 3;
+
+        private struct RuntimeFunction
+        {
+            public uint BeginAddress;
+            public uint EndAddress;
+            public uint UnwindData;
+        }
+
+        private struct UnwindInfo
+        {
+            public byte VersionAndFlags;
+            public byte SizeOfProlog;
+            public byte CountOfUnwindCodes;
+            public byte FrameRegister;
+            public unsafe fixed ushort UnwindCodes[MaxUnwindCodesArraySize];
+        }
+
+        private enum UnwindOperation
+        {
+            PushNonvol    = 0,
+            AllocLarge    = 1,
+            AllocSmall    = 2,
+            SetFpreg      = 3,
+            SaveNonvol    = 4,
+            SaveNonvolFar = 5,
+            SaveXmm128    = 8,
+            SaveXmm128Far = 9,
+            PushMachframe = 10
+        }
+
+        private unsafe delegate RuntimeFunction* GetRuntimeFunctionCallback(ulong controlPc, IntPtr context);
+
+        [DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
+        private static unsafe extern bool RtlInstallFunctionTableCallback(
+            ulong                      tableIdentifier,
+            ulong                      baseAddress,
+            uint                       length,
+            GetRuntimeFunctionCallback callback,
+            IntPtr                     context,
+            string                     outOfProcessCallbackDll);
+
+        private static GetRuntimeFunctionCallback _getRuntimeFunctionCallback;
+
+        private static int _sizeOfRuntimeFunction;
+
+        private unsafe static RuntimeFunction* _runtimeFunction;
+
+        private unsafe static UnwindInfo* _unwindInfo;
+
+        public static void InstallFunctionTableHandler(IntPtr codeCachePointer, uint codeCacheLength)
+        {
+            ulong codeCachePtr = (ulong)codeCachePointer.ToInt64();
+
+            _sizeOfRuntimeFunction = Marshal.SizeOf<RuntimeFunction>();
+
+            bool result;
+
+            unsafe
+            {
+                _runtimeFunction = (RuntimeFunction*)codeCachePointer;
+
+                _unwindInfo = (UnwindInfo*)(codeCachePointer + _sizeOfRuntimeFunction);
+
+                _getRuntimeFunctionCallback = new GetRuntimeFunctionCallback(FunctionTableHandler);
+
+                result = RtlInstallFunctionTableCallback(
+                    codeCachePtr | 3,
+                    codeCachePtr,
+                    codeCacheLength,
+                    _getRuntimeFunctionCallback,
+                    codeCachePointer,
+                    null);
+            }
+
+            if (!result)
+            {
+                throw new InvalidOperationException("Failure installing function table callback.");
+            }
+        }
+
+        private static unsafe RuntimeFunction* FunctionTableHandler(ulong controlPc, IntPtr context)
+        {
+            int offset = (int)((long)controlPc - context.ToInt64());
+
+            if (!JitCache.TryFind(offset, out JitCacheEntry funcEntry))
+            {
+                // Not found.
+                return null;
+            }
+
+            var unwindInfo = funcEntry.UnwindInfo;
+
+            int codeIndex = 0;
+
+            int spOffset = unwindInfo.FixedAllocSize;
+
+            foreach (var entry in unwindInfo.PushEntries)
+            {
+                if (entry.Type == RegisterType.Vector)
+                {
+                    spOffset -= 16;
+                }
+            }
+
+            for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--)
+            {
+                var entry = unwindInfo.PushEntries[index];
+
+                if (entry.Type == RegisterType.Vector)
+                {
+                    ushort uwop = PackUwop(UnwindOperation.SaveXmm128, entry.StreamEndOffset, entry.Index);
+
+                    _unwindInfo->UnwindCodes[codeIndex++] = uwop;
+                    _unwindInfo->UnwindCodes[codeIndex++] = (ushort)spOffset;
+
+                    spOffset += 16;
+                }
+            }
+
+            _unwindInfo->UnwindCodes[0] = PackUwop(UnwindOperation.AllocLarge, unwindInfo.PrologueSize, 1);
+            _unwindInfo->UnwindCodes[1] = (ushort)(unwindInfo.FixedAllocSize >> 0);
+            _unwindInfo->UnwindCodes[2] = (ushort)(unwindInfo.FixedAllocSize >> 16);
+
+            codeIndex += 3;
+
+            for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--)
+            {
+                var entry = unwindInfo.PushEntries[index];
+
+                if (entry.Type == RegisterType.Integer)
+                {
+                    ushort uwop = PackUwop(UnwindOperation.PushNonvol, entry.StreamEndOffset, entry.Index);
+
+                    _unwindInfo->UnwindCodes[codeIndex++] = uwop;
+                }
+            }
+
+            _unwindInfo->VersionAndFlags    = 1;
+            _unwindInfo->SizeOfProlog       = (byte)unwindInfo.PrologueSize;
+            _unwindInfo->CountOfUnwindCodes = (byte)codeIndex;
+            _unwindInfo->FrameRegister      = 0;
+
+            _runtimeFunction->BeginAddress = (uint)funcEntry.Offset;
+            _runtimeFunction->EndAddress   = (uint)(funcEntry.Offset + funcEntry.Size);
+            _runtimeFunction->UnwindData   = (uint)_sizeOfRuntimeFunction;
+
+            return _runtimeFunction;
+        }
+
+        private static ushort PackUwop(UnwindOperation uwop, int prologOffset, int opInfo)
+        {
+            return (ushort)(prologOffset | ((int)uwop << 8) | (opInfo << 12));
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/PriorityQueue.cs b/ARMeilleure/Translation/PriorityQueue.cs
new file mode 100644
index 000000000..ab593dc07
--- /dev/null
+++ b/ARMeilleure/Translation/PriorityQueue.cs
@@ -0,0 +1,39 @@
+using System.Collections.Concurrent;
+
+namespace ARMeilleure.Translation
+{
+    class PriorityQueue<T>
+    {
+        private ConcurrentQueue<T>[] _queues;
+
+        public PriorityQueue(int priorities)
+        {
+            _queues = new ConcurrentQueue<T>[priorities];
+
+            for (int index = 0; index < priorities; index++)
+            {
+                _queues[index] = new ConcurrentQueue<T>();
+            }
+        }
+
+        public void Enqueue(int priority, T value)
+        {
+            _queues[priority].Enqueue(value);
+        }
+
+        public bool TryDequeue(out T value)
+        {
+            for (int index = 0; index < _queues.Length; index++)
+            {
+                if (_queues[index].TryDequeue(out value))
+                {
+                    return true;
+                }
+            }
+
+            value = default(T);
+
+            return false;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/RegisterToLocal.cs b/ARMeilleure/Translation/RegisterToLocal.cs
new file mode 100644
index 000000000..aa9180182
--- /dev/null
+++ b/ARMeilleure/Translation/RegisterToLocal.cs
@@ -0,0 +1,52 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+    static class RegisterToLocal
+    {
+        public static void Rename(ControlFlowGraph cfg)
+        {
+            Dictionary<Register, Operand> registerToLocalMap = new Dictionary<Register, Operand>();
+
+            Operand GetLocal(Operand op)
+            {
+                Register register = op.GetRegister();
+
+                if (!registerToLocalMap.TryGetValue(register, out Operand local))
+                {
+                    local = Local(op.Type);
+
+                    registerToLocalMap.Add(register, local);
+                }
+
+                return local;
+            }
+
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                foreach (Node node in block.Operations)
+                {
+                    Operand dest = node.Destination;
+
+                    if (dest != null && dest.Kind == OperandKind.Register)
+                    {
+                        node.Destination = GetLocal(dest);
+                    }
+
+                    for (int index = 0; index < node.SourcesCount; index++)
+                    {
+                        Operand source = node.GetSource(index);
+
+                        if (source.Kind == OperandKind.Register)
+                        {
+                            node.SetSource(index, GetLocal(source));
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/RegisterUsage.cs b/ARMeilleure/Translation/RegisterUsage.cs
new file mode 100644
index 000000000..4164786b9
--- /dev/null
+++ b/ARMeilleure/Translation/RegisterUsage.cs
@@ -0,0 +1,413 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+    static class RegisterUsage
+    {
+        private const long CallerSavedIntRegistersMask = 0x7fL  << 9;
+        private const long PStateNzcvFlagsMask         = 0xfL   << 60;
+
+        private const long CallerSavedVecRegistersMask = 0xffffL << 16;
+
+        private const int RegsCount = 32;
+        private const int RegsMask  = RegsCount - 1;
+
+        private struct RegisterMask : IEquatable<RegisterMask>
+        {
+            public long IntMask { get; set; }
+            public long VecMask { get; set; }
+
+            public RegisterMask(long intMask, long vecMask)
+            {
+                IntMask = intMask;
+                VecMask = vecMask;
+            }
+
+            public static RegisterMask operator &(RegisterMask x, RegisterMask y)
+            {
+                return new RegisterMask(x.IntMask & y.IntMask, x.VecMask & y.VecMask);
+            }
+
+            public static RegisterMask operator |(RegisterMask x, RegisterMask y)
+            {
+                return new RegisterMask(x.IntMask | y.IntMask, x.VecMask | y.VecMask);
+            }
+
+            public static RegisterMask operator ~(RegisterMask x)
+            {
+                return new RegisterMask(~x.IntMask, ~x.VecMask);
+            }
+
+            public static bool operator ==(RegisterMask x, RegisterMask y)
+            {
+                return x.Equals(y);
+            }
+
+            public static bool operator !=(RegisterMask x, RegisterMask y)
+            {
+                return !x.Equals(y);
+            }
+
+            public override bool Equals(object obj)
+            {
+                return obj is RegisterMask regMask && Equals(regMask);
+            }
+
+            public bool Equals(RegisterMask other)
+            {
+                return IntMask == other.IntMask && VecMask == other.VecMask;
+            }
+
+            public override int GetHashCode()
+            {
+                return HashCode.Combine(IntMask, VecMask);
+            }
+        }
+
+        public static void RunPass(ControlFlowGraph cfg, bool isCompleteFunction)
+        {
+            // Compute local register inputs and outputs used inside blocks.
+            RegisterMask[] localInputs  = new RegisterMask[cfg.Blocks.Count];
+            RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Count];
+
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                foreach (Node node in block.Operations)
+                {
+                    Operation operation = node as Operation;
+
+                    for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+                    {
+                        Operand source = operation.GetSource(srcIndex);
+
+                        if (source.Kind != OperandKind.Register)
+                        {
+                            continue;
+                        }
+
+                        Register register = source.GetRegister();
+
+                        localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index];
+                    }
+
+                    if (operation.Destination != null && operation.Destination.Kind == OperandKind.Register)
+                    {
+                        localOutputs[block.Index] |= GetMask(operation.Destination.GetRegister());
+                    }
+                }
+            }
+
+            // Compute global register inputs and outputs used across blocks.
+            RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Count];
+
+            RegisterMask[] globalInputs  = new RegisterMask[cfg.Blocks.Count];
+            RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Count];
+
+            bool modified;
+
+            bool firstPass = true;
+
+            do
+            {
+                modified = false;
+
+                // Compute register outputs.
+                for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+                {
+                    BasicBlock block = cfg.PostOrderBlocks[index];
+
+                    if (block.Predecessors.Count != 0 && !HasContextLoad(block))
+                    {
+                        BasicBlock predecessor = block.Predecessors[0];
+
+                        RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+
+                        RegisterMask outputs = globalOutputs[predecessor.Index];
+
+                        for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++)
+                        {
+                            predecessor = block.Predecessors[pIndex];
+
+                            cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+
+                            outputs |= globalOutputs[predecessor.Index];
+                        }
+
+                        globalInputs[block.Index] |= outputs & ~cmnOutputs;
+
+                        if (!firstPass)
+                        {
+                            cmnOutputs &= globalCmnOutputs[block.Index];
+                        }
+
+                        if (Exchange(globalCmnOutputs, block.Index, cmnOutputs))
+                        {
+                            modified = true;
+                        }
+
+                        outputs |= localOutputs[block.Index];
+
+                        if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs))
+                        {
+                            modified = true;
+                        }
+                    }
+                    else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index]))
+                    {
+                        modified = true;
+                    }
+                }
+
+                // Compute register inputs.
+                for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+                {
+                    BasicBlock block = cfg.PostOrderBlocks[index];
+
+                    RegisterMask inputs = localInputs[block.Index];
+
+                    if (block.Next != null)
+                    {
+                        inputs |= globalInputs[block.Next.Index];
+                    }
+
+                    if (block.Branch != null)
+                    {
+                        inputs |= globalInputs[block.Branch.Index];
+                    }
+
+                    inputs &= ~globalCmnOutputs[block.Index];
+
+                    if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs))
+                    {
+                        modified = true;
+                    }
+                }
+
+                firstPass = false;
+            }
+            while (modified);
+
+            // Insert load and store context instructions where needed.
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                bool hasContextLoad = HasContextLoad(block);
+
+                if (hasContextLoad)
+                {
+                    block.Operations.RemoveFirst();
+                }
+
+                // The only block without any predecessor should be the entry block.
+                // It always needs a context load as it is the first block to run.
+                if (block.Predecessors.Count == 0 || hasContextLoad)
+                {
+                    LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector);
+                    LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer);
+                }
+
+                bool hasContextStore = HasContextStore(block);
+
+                if (hasContextStore)
+                {
+                    block.Operations.RemoveLast();
+                }
+
+                if (EndsWithReturn(block) || hasContextStore)
+                {
+                    StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, isCompleteFunction);
+                    StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector,  isCompleteFunction);
+                }
+            }
+        }
+
+        private static bool HasContextLoad(BasicBlock block)
+        {
+            return StartsWith(block, Instruction.LoadFromContext) && block.Operations.First.Value.SourcesCount == 0;
+        }
+
+        private static bool HasContextStore(BasicBlock block)
+        {
+            return EndsWith(block, Instruction.StoreToContext) && block.GetLastOp().SourcesCount == 0;
+        }
+
+        private static bool StartsWith(BasicBlock block, Instruction inst)
+        {
+            if (block.Operations.Count == 0)
+            {
+                return false;
+            }
+
+            return block.Operations.First.Value is Operation operation && operation.Instruction == inst;
+        }
+
+        private static bool EndsWith(BasicBlock block, Instruction inst)
+        {
+            if (block.Operations.Count == 0)
+            {
+                return false;
+            }
+
+            return block.Operations.Last.Value is Operation operation && operation.Instruction == inst;
+        }
+
+        private static RegisterMask GetMask(Register register)
+        {
+            long intMask = 0;
+            long vecMask = 0;
+
+            switch (register.Type)
+            {
+                case RegisterType.Flag:    intMask = (1L << RegsCount) << register.Index; break;
+                case RegisterType.Integer: intMask =  1L               << register.Index; break;
+                case RegisterType.Vector:  vecMask =  1L               << register.Index; break;
+            }
+
+            return new RegisterMask(intMask, vecMask);
+        }
+
+        private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value)
+        {
+            RegisterMask oldValue = masks[blkIndex];
+
+            masks[blkIndex] = value;
+
+            return oldValue != value;
+        }
+
+        private static void LoadLocals(BasicBlock block, long inputs, RegisterType baseType)
+        {
+            Operand arg0 = Local(OperandType.I64);
+
+            for (int bit = 63; bit >= 0; bit--)
+            {
+                long mask = 1L << bit;
+
+                if ((inputs & mask) == 0)
+                {
+                    continue;
+                }
+
+                Operand dest = GetRegFromBit(bit, baseType);
+
+                long offset = NativeContext.GetRegisterOffset(dest.GetRegister());
+
+                Operand addr = Local(OperandType.I64);
+
+                Operation loadOp = new Operation(Instruction.Load, dest, addr);
+
+                block.Operations.AddFirst(loadOp);
+
+                Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset));
+
+                block.Operations.AddFirst(calcOffsOp);
+            }
+
+            Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0));
+
+            block.Operations.AddFirst(loadArg0);
+        }
+
+        private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, bool isCompleteFunction)
+        {
+            if (Optimizations.AssumeStrictAbiCompliance && isCompleteFunction)
+            {
+                if (baseType == RegisterType.Integer || baseType == RegisterType.Flag)
+                {
+                    outputs = ClearCallerSavedIntRegs(outputs);
+                }
+                else /* if (baseType == RegisterType.Vector) */
+                {
+                    outputs = ClearCallerSavedVecRegs(outputs);
+                }
+            }
+
+            Operand arg0 = Local(OperandType.I64);
+
+            Operation loadArg0 = new Operation(Instruction.LoadArgument, arg0, Const(0));
+
+            block.Append(loadArg0);
+
+            for (int bit = 0; bit < 64; bit++)
+            {
+                long mask = 1L << bit;
+
+                if ((outputs & mask) == 0)
+                {
+                    continue;
+                }
+
+                Operand source = GetRegFromBit(bit, baseType);
+
+                long offset = NativeContext.GetRegisterOffset(source.GetRegister());
+
+                Operand addr = Local(OperandType.I64);
+
+                Operation calcOffsOp = new Operation(Instruction.Add, addr, arg0, Const(offset));
+
+                block.Append(calcOffsOp);
+
+                Operation storeOp = new Operation(Instruction.Store, null, addr, source);
+
+                block.Append(storeOp);
+            }
+        }
+
+        private static Operand GetRegFromBit(int bit, RegisterType baseType)
+        {
+            if (bit < RegsCount)
+            {
+                return new Operand(bit, baseType, GetOperandType(baseType));
+            }
+            else if (baseType == RegisterType.Integer)
+            {
+                return new Operand(bit & RegsMask, RegisterType.Flag, OperandType.I32);
+            }
+            else
+            {
+                throw new ArgumentOutOfRangeException(nameof(bit));
+            }
+        }
+
+        private static OperandType GetOperandType(RegisterType type)
+        {
+            switch (type)
+            {
+                case RegisterType.Flag:    return OperandType.I32;
+                case RegisterType.Integer: return OperandType.I64;
+                case RegisterType.Vector:  return OperandType.V128;
+            }
+
+            throw new ArgumentException($"Invalid register type \"{type}\".");
+        }
+
+        private static bool EndsWithReturn(BasicBlock block)
+        {
+            if (!(block.GetLastOp() is Operation operation))
+            {
+                return false;
+            }
+
+            return operation.Instruction == Instruction.Return;
+        }
+
+        private static long ClearCallerSavedIntRegs(long mask)
+        {
+            // TODO: ARM32 support.
+            mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask);
+
+            return mask;
+        }
+
+        private static long ClearCallerSavedVecRegs(long mask)
+        {
+            // TODO: ARM32 support.
+            mask &= ~CallerSavedVecRegistersMask;
+
+            return mask;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/SsaConstruction.cs b/ARMeilleure/Translation/SsaConstruction.cs
new file mode 100644
index 000000000..ccf525915
--- /dev/null
+++ b/ARMeilleure/Translation/SsaConstruction.cs
@@ -0,0 +1,293 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+    static partial class Ssa
+    {
+        private class DefMap
+        {
+            private Dictionary<Register, Operand> _map;
+
+            private BitMap _phiMasks;
+
+            public DefMap()
+            {
+                _map = new Dictionary<Register, Operand>();
+
+                _phiMasks = new BitMap(RegisterConsts.TotalCount);
+            }
+
+            public bool TryAddOperand(Register reg, Operand operand)
+            {
+                return _map.TryAdd(reg, operand);
+            }
+
+            public bool TryGetOperand(Register reg, out Operand operand)
+            {
+                return _map.TryGetValue(reg, out operand);
+            }
+
+            public bool AddPhi(Register reg)
+            {
+                return _phiMasks.Set(GetIdFromRegister(reg));
+            }
+
+            public bool HasPhi(Register reg)
+            {
+                return _phiMasks.IsSet(GetIdFromRegister(reg));
+            }
+        }
+
+        public static void Construct(ControlFlowGraph cfg)
+        {
+            DefMap[] globalDefs = new DefMap[cfg.Blocks.Count];
+
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                globalDefs[block.Index] = new DefMap();
+            }
+
+            Queue<BasicBlock> dfPhiBlocks = new Queue<BasicBlock>();
+
+            // First pass, get all defs and locals uses.
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                Operand[] localDefs = new Operand[RegisterConsts.TotalCount];
+
+                LinkedListNode<Node> node = block.Operations.First;
+
+                Operand RenameLocal(Operand operand)
+                {
+                    if (operand != null && operand.Kind == OperandKind.Register)
+                    {
+                        Operand local = localDefs[GetIdFromRegister(operand.GetRegister())];
+
+                        operand = local ?? operand;
+                    }
+
+                    return operand;
+                }
+
+                while (node != null)
+                {
+                    if (node.Value is Operation operation)
+                    {
+                        for (int index = 0; index < operation.SourcesCount; index++)
+                        {
+                            operation.SetSource(index, RenameLocal(operation.GetSource(index)));
+                        }
+
+                        Operand dest = operation.Destination;
+
+                        if (dest != null && dest.Kind == OperandKind.Register)
+                        {
+                            Operand local = Local(dest.Type);
+
+                            localDefs[GetIdFromRegister(dest.GetRegister())] = local;
+
+                            operation.Destination = local;
+                        }
+                    }
+
+                    node = node.Next;
+                }
+
+                for (int index = 0; index < RegisterConsts.TotalCount; index++)
+                {
+                    Operand local = localDefs[index];
+
+                    if (local == null)
+                    {
+                        continue;
+                    }
+
+                    Register reg = GetRegisterFromId(index);
+
+                    globalDefs[block.Index].TryAddOperand(reg, local);
+
+                    dfPhiBlocks.Enqueue(block);
+
+                    while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock))
+                    {
+                        foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers)
+                        {
+                            if (globalDefs[domFrontier.Index].AddPhi(reg))
+                            {
+                                dfPhiBlocks.Enqueue(domFrontier);
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Second pass, rename variables with definitions on different blocks.
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                Operand[] localDefs = new Operand[RegisterConsts.TotalCount];
+
+                LinkedListNode<Node> node = block.Operations.First;
+
+                Operand RenameGlobal(Operand operand)
+                {
+                    if (operand != null && operand.Kind == OperandKind.Register)
+                    {
+                        int key = GetIdFromRegister(operand.GetRegister());
+
+                        Operand local = localDefs[key];
+
+                        if (local == null)
+                        {
+                            local = FindDef(globalDefs, block, operand);
+
+                            localDefs[key] = local;
+                        }
+
+                        operand = local;
+                    }
+
+                    return operand;
+                }
+
+                while (node != null)
+                {
+                    if (node.Value is Operation operation)
+                    {
+                        for (int index = 0; index < operation.SourcesCount; index++)
+                        {
+                            operation.SetSource(index, RenameGlobal(operation.GetSource(index)));
+                        }
+                    }
+
+                    node = node.Next;
+                }
+            }
+        }
+
+        private static Operand FindDef(DefMap[] globalDefs, BasicBlock current, Operand operand)
+        {
+            if (globalDefs[current.Index].HasPhi(operand.GetRegister()))
+            {
+                return InsertPhi(globalDefs, current, operand);
+            }
+
+            if (current != current.ImmediateDominator)
+            {
+                return FindDefOnPred(globalDefs, current.ImmediateDominator, operand);
+            }
+
+            return Undef();
+        }
+
+        private static Operand FindDefOnPred(DefMap[] globalDefs, BasicBlock current, Operand operand)
+        {
+            BasicBlock previous;
+
+            do
+            {
+                DefMap defMap = globalDefs[current.Index];
+
+                Register reg = operand.GetRegister();
+
+                if (defMap.TryGetOperand(reg, out Operand lastDef))
+                {
+                    return lastDef;
+                }
+
+                if (defMap.HasPhi(reg))
+                {
+                    return InsertPhi(globalDefs, current, operand);
+                }
+
+                previous = current;
+                current  = current.ImmediateDominator;
+            }
+            while (previous != current);
+
+            return Undef();
+        }
+
+        private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Operand operand)
+        {
+            // This block has a Phi that has not been materialized yet, but that
+            // would define a new version of the variable we're looking for. We need
+            // to materialize the Phi, add all the block/operand pairs into the Phi, and
+            // then use the definition from that Phi.
+            Operand local = Local(operand.Type);
+
+            PhiNode phi = new PhiNode(local, block.Predecessors.Count);
+
+            AddPhi(block, phi);
+
+            globalDefs[block.Index].TryAddOperand(operand.GetRegister(), local);
+
+            for (int index = 0; index < block.Predecessors.Count; index++)
+            {
+                BasicBlock predecessor = block.Predecessors[index];
+
+                phi.SetBlock(index, predecessor);
+                phi.SetSource(index, FindDefOnPred(globalDefs, predecessor, operand));
+            }
+
+            return local;
+        }
+
+        private static void AddPhi(BasicBlock block, PhiNode phi)
+        {
+            LinkedListNode<Node> node = block.Operations.First;
+
+            if (node != null)
+            {
+                while (node.Next?.Value is PhiNode)
+                {
+                    node = node.Next;
+                }
+            }
+
+            if (node?.Value is PhiNode)
+            {
+                block.Operations.AddAfter(node, phi);
+            }
+            else
+            {
+                block.Operations.AddFirst(phi);
+            }
+        }
+
+        private static int GetIdFromRegister(Register reg)
+        {
+            if (reg.Type == RegisterType.Integer)
+            {
+                return reg.Index;
+            }
+            else if (reg.Type == RegisterType.Vector)
+            {
+                return RegisterConsts.IntRegsCount + reg.Index;
+            }
+            else /* if (reg.Type == RegisterType.Flag) */
+            {
+                return RegisterConsts.IntAndVecRegsCount + reg.Index;
+            }
+        }
+
+        private static Register GetRegisterFromId(int id)
+        {
+            if (id < RegisterConsts.IntRegsCount)
+            {
+                return new Register(id, RegisterType.Integer);
+            }
+            else if (id < RegisterConsts.IntAndVecRegsCount)
+            {
+                return new Register(id - RegisterConsts.IntRegsCount, RegisterType.Vector);
+            }
+            else /* if (id < RegisterConsts.TotalCount) */
+            {
+                return new Register(id - RegisterConsts.IntAndVecRegsCount, RegisterType.Flag);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/SsaDeconstruction.cs b/ARMeilleure/Translation/SsaDeconstruction.cs
new file mode 100644
index 000000000..2ba78bdf4
--- /dev/null
+++ b/ARMeilleure/Translation/SsaDeconstruction.cs
@@ -0,0 +1,46 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+    static partial class Ssa
+    {
+        public static void Deconstruct(ControlFlowGraph cfg)
+        {
+            foreach (BasicBlock block in cfg.Blocks)
+            {
+                LinkedListNode<Node> node = block.Operations.First;
+
+                while (node?.Value is PhiNode phi)
+                {
+                    LinkedListNode<Node> nextNode = node.Next;
+
+                    Operand local = Local(phi.Destination.Type);
+
+                    for (int index = 0; index < phi.SourcesCount; index++)
+                    {
+                        BasicBlock predecessor = phi.GetBlock(index);
+
+                        Operand source = phi.GetSource(index);
+
+                        predecessor.Append(new Operation(Instruction.Copy, local, source));
+
+                        phi.SetSource(index, null);
+                    }
+
+                    Operation copyOp = new Operation(Instruction.Copy, phi.Destination, local);
+
+                    block.Operations.AddBefore(node, copyOp);
+
+                    phi.Destination = null;
+
+                    block.Operations.Remove(node);
+
+                    node = nextNode;
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/TranslatedFunction.cs b/ARMeilleure/Translation/TranslatedFunction.cs
new file mode 100644
index 000000000..06069cf8f
--- /dev/null
+++ b/ARMeilleure/Translation/TranslatedFunction.cs
@@ -0,0 +1,30 @@
+using System.Threading;
+
+namespace ARMeilleure.Translation
+{
+    class TranslatedFunction
+    {
+        private const int MinCallsForRejit = 100;
+
+        private GuestFunction _func;
+
+        private bool _rejit;
+        private int  _callCount;
+
+        public TranslatedFunction(GuestFunction func, bool rejit)
+        {
+            _func  = func;
+            _rejit = rejit;
+        }
+
+        public ulong Execute(State.ExecutionContext context)
+        {
+            return _func(context.NativeContextPtr);
+        }
+
+        public bool ShouldRejit()
+        {
+            return _rejit && Interlocked.Increment(ref _callCount) == MinCallsForRejit;
+        }
+    }
+}
\ No newline at end of file
diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs
new file mode 100644
index 000000000..6a7451214
--- /dev/null
+++ b/ARMeilleure/Translation/Translator.cs
@@ -0,0 +1,253 @@
+using ARMeilleure.Decoders;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.Instructions;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using System;
+using System.Collections.Concurrent;
+using System.Threading;
+
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
+
+namespace ARMeilleure.Translation
+{
+    public class Translator : ITranslator
+    {
+        private const ulong CallFlag = InstEmitFlowHelper.CallFlag;
+
+        private MemoryManager _memory;
+
+        private ConcurrentDictionary<ulong, TranslatedFunction> _funcs;
+
+        private PriorityQueue<ulong> _backgroundQueue;
+
+        private AutoResetEvent _backgroundTranslatorEvent;
+
+        private volatile int _threadCount;
+
+        public Translator(MemoryManager memory)
+        {
+            _memory = memory;
+
+            _funcs = new ConcurrentDictionary<ulong, TranslatedFunction>();
+
+            _backgroundQueue = new PriorityQueue<ulong>(2);
+
+            _backgroundTranslatorEvent = new AutoResetEvent(false);
+        }
+
+        private void TranslateQueuedSubs()
+        {
+            while (_threadCount != 0)
+            {
+                if (_backgroundQueue.TryDequeue(out ulong address))
+                {
+                    TranslatedFunction func = Translate(address, ExecutionMode.Aarch64, highCq: true);
+
+                    _funcs.AddOrUpdate(address, func, (key, oldFunc) => func);
+                }
+                else
+                {
+                    _backgroundTranslatorEvent.WaitOne();
+                }
+            }
+        }
+
+        public void Execute(IExecutionContext ctx, ulong address)
+        {
+            State.ExecutionContext context = (State.ExecutionContext)ctx;
+
+            if (Interlocked.Increment(ref _threadCount) == 1)
+            {
+                Thread backgroundTranslatorThread = new Thread(TranslateQueuedSubs);
+
+                backgroundTranslatorThread.Priority = ThreadPriority.Lowest;
+                backgroundTranslatorThread.Start();
+            }
+
+            Statistics.InitializeTimer();
+
+            NativeInterface.RegisterThread(context, _memory);
+
+            do
+            {
+                address = ExecuteSingle(context, address);
+            }
+            while (context.Running && (address & ~1UL) != 0);
+
+            NativeInterface.UnregisterThread();
+
+            if (Interlocked.Decrement(ref _threadCount) == 0)
+            {
+                _backgroundTranslatorEvent.Set();
+            }
+        }
+
+        public ulong ExecuteSingle(State.ExecutionContext context, ulong address)
+        {
+            TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode);
+
+            Statistics.StartTimer();
+
+            ulong nextAddr = func.Execute(context);
+
+            Statistics.StopTimer(address);
+
+            return nextAddr;
+        }
+
+        private TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode)
+        {
+            // TODO: Investigate how we should handle code at unaligned addresses.
+            // Currently, those low bits are used to store special flags.
+            bool isCallTarget = (address & CallFlag) != 0;
+
+            address &= ~CallFlag;
+
+            if (!_funcs.TryGetValue(address, out TranslatedFunction func))
+            {
+                func = Translate(address, mode, highCq: false);
+
+                _funcs.TryAdd(address, func);
+            }
+            else if (isCallTarget && func.ShouldRejit())
+            {
+                _backgroundQueue.Enqueue(0, address);
+
+                _backgroundTranslatorEvent.Set();
+            }
+
+            return func;
+        }
+
+        private TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq)
+        {
+            ArmEmitterContext context = new ArmEmitterContext(_memory, Aarch32Mode.User);
+
+            Logger.StartPass(PassName.Decoding);
+
+            Block[] blocks = highCq
+                ? Decoder.DecodeFunction  (_memory, address, mode)
+                : Decoder.DecodeBasicBlock(_memory, address, mode);
+
+            Logger.EndPass(PassName.Decoding);
+
+            Logger.StartPass(PassName.Translation);
+
+            EmitSynchronization(context);
+
+            if (blocks[0].Address != address)
+            {
+                context.Branch(context.GetLabel(address));
+            }
+
+            ControlFlowGraph cfg = EmitAndGetCFG(context, blocks);
+
+            Logger.EndPass(PassName.Translation);
+
+            Logger.StartPass(PassName.RegisterUsage);
+
+            RegisterUsage.RunPass(cfg, isCompleteFunction: false);
+
+            Logger.EndPass(PassName.RegisterUsage);
+
+            OperandType[] argTypes = new OperandType[] { OperandType.I64 };
+
+            CompilerOptions options = highCq
+                ? CompilerOptions.HighCq
+                : CompilerOptions.None;
+
+            GuestFunction func = Compiler.Compile<GuestFunction>(cfg, argTypes, OperandType.I64, options);
+
+            return new TranslatedFunction(func, rejit: !highCq);
+        }
+
+        private static ControlFlowGraph EmitAndGetCFG(ArmEmitterContext context, Block[] blocks)
+        {
+            for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+            {
+                Block block = blocks[blkIndex];
+
+                context.CurrBlock = block;
+
+                context.MarkLabel(context.GetLabel(block.Address));
+
+                for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
+                {
+                    OpCode opCode = block.OpCodes[opcIndex];
+
+                    context.CurrOp = opCode;
+
+                    bool isLastOp = opcIndex == block.OpCodes.Count - 1;
+
+                    if (isLastOp && block.Branch != null && block.Branch.Address <= block.Address)
+                    {
+                        EmitSynchronization(context);
+                    }
+
+                    Operand lblPredicateSkip = null;
+
+                    if (opCode is OpCode32 op && op.Cond < Condition.Al)
+                    {
+                        lblPredicateSkip = Label();
+
+                        InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert());
+                    }
+
+                    if (opCode.Instruction.Emitter != null)
+                    {
+                        opCode.Instruction.Emitter(context);
+                    }
+                    else
+                    {
+                        throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\".");
+                    }
+
+                    if (lblPredicateSkip != null)
+                    {
+                        context.MarkLabel(lblPredicateSkip);
+
+                        // If this is the last op on the block, and there's no "next" block
+                        // after this one, then we have to return right now, with the address
+                        // of the next instruction to be executed (in the case that the condition
+                        // is false, and the branch was not taken, as all basic blocks should end
+                        // with some kind of branch).
+                        if (isLastOp && block.Next == null)
+                        {
+                            context.Return(Const(opCode.Address + (ulong)opCode.OpCodeSizeInBytes));
+                        }
+                    }
+                }
+            }
+
+            return context.GetControlFlowGraph();
+        }
+
+        private static void EmitSynchronization(EmitterContext context)
+        {
+            long countOffs = NativeContext.GetCounterOffset();
+
+            Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs));
+
+            Operand count = context.Load(OperandType.I32, countAddr);
+
+            Operand lblNonZero = Label();
+            Operand lblExit    = Label();
+
+            context.BranchIfTrue(lblNonZero, count);
+
+            context.Call(new _Void(NativeInterface.CheckSynchronization));
+
+            context.Branch(lblExit);
+
+            context.MarkLabel(lblNonZero);
+
+            count = context.Subtract(count, Const(1));
+
+            context.Store(countAddr, count);
+
+            context.MarkLabel(lblExit);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ChocolArm64/ChocolArm64.csproj b/ChocolArm64/ChocolArm64.csproj
index ea98003f9..cccdd94df 100644
--- a/ChocolArm64/ChocolArm64.csproj
+++ b/ChocolArm64/ChocolArm64.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
   </PropertyGroup>
 
@@ -33,6 +33,7 @@
 
   <ItemGroup>
     <ProjectReference Include="..\Ryujinx.Profiler\Ryujinx.Profiler.csproj" />
+    <ProjectReference Include="..\ARMeilleure\ARMeilleure.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/ChocolArm64/CpuThread.cs b/ChocolArm64/CpuThread.cs
deleted file mode 100644
index ad1fd6f3c..000000000
--- a/ChocolArm64/CpuThread.cs
+++ /dev/null
@@ -1,66 +0,0 @@
-using ChocolArm64.Memory;
-using ChocolArm64.State;
-using ChocolArm64.Translation;
-using System;
-using System.Threading;
-
-namespace ChocolArm64
-{
-    public class CpuThread
-    {
-        public CpuThreadState ThreadState { get; private set; }
-        public MemoryManager  Memory      { get; private set; }
-
-        private Translator _translator;
-
-        public Thread Work;
-
-        public event EventHandler WorkFinished;
-
-        private int _isExecuting;
-
-        public CpuThread(Translator translator, MemoryManager memory, long entrypoint)
-        {
-            _translator = translator;
-            Memory      = memory;
-
-            ThreadState = new CpuThreadState();
-
-            ThreadState.Running = true;
-
-            Work = new Thread(delegate()
-            {
-                translator.ExecuteSubroutine(this, entrypoint);
-
-                WorkFinished?.Invoke(this, EventArgs.Empty);
-            });
-        }
-
-        public bool Execute()
-        {
-            if (Interlocked.Exchange(ref _isExecuting, 1) == 1)
-            {
-                return false;
-            }
-
-            Work.Start();
-
-            return true;
-        }
-
-        public void StopExecution()
-        {
-            ThreadState.Running = false;
-        }
-
-        public void RequestInterrupt()
-        {
-            ThreadState.RequestInterrupt();
-        }
-
-        public bool IsCurrentThread()
-        {
-            return Thread.CurrentThread == Work;
-        }
-    }
-}
\ No newline at end of file
diff --git a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs
index dbb588867..08c8265b5 100644
--- a/ChocolArm64/Instructions/InstEmitMemoryHelper.cs
+++ b/ChocolArm64/Instructions/InstEmitMemoryHelper.cs
@@ -462,11 +462,11 @@ namespace ChocolArm64.Instructions
 
             switch (size)
             {
-                case 0: fallbackMethodName = nameof(MemoryManager.WriteVector8);   break;
-                case 1: fallbackMethodName = nameof(MemoryManager.WriteVector16);  break;
-                case 2: fallbackMethodName = nameof(MemoryManager.WriteVector32);  break;
-                case 3: fallbackMethodName = nameof(MemoryManager.WriteVector64);  break;
-                case 4: fallbackMethodName = nameof(MemoryManager.WriteVector128); break;
+                case 0: fallbackMethodName = nameof(MemoryManager.WriteVector8);           break;
+                case 1: fallbackMethodName = nameof(MemoryManager.WriteVector16);          break;
+                case 2: fallbackMethodName = nameof(MemoryManager.WriteVector32);          break;
+                case 3: fallbackMethodName = nameof(MemoryManager.WriteVector64);          break;
+                case 4: fallbackMethodName = nameof(MemoryManager.WriteVector128Internal); break;
             }
 
             context.EmitCall(typeof(MemoryManager), fallbackMethodName);
diff --git a/ChocolArm64/Instructions/InstEmitSystem.cs b/ChocolArm64/Instructions/InstEmitSystem.cs
index d0d60b9d5..ac264de92 100644
--- a/ChocolArm64/Instructions/InstEmitSystem.cs
+++ b/ChocolArm64/Instructions/InstEmitSystem.cs
@@ -31,8 +31,8 @@ namespace ChocolArm64.Instructions
             {
                 case 0b11_011_0000_0000_001: propName = nameof(CpuThreadState.CtrEl0);    break;
                 case 0b11_011_0000_0000_111: propName = nameof(CpuThreadState.DczidEl0);  break;
-                case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr);      break;
-                case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr);      break;
+                case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.CFpcr);     break;
+                case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.CFpsr);     break;
                 case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0);  break;
                 case 0b11_011_1101_0000_011: propName = nameof(CpuThreadState.Tpidr);     break;
                 case 0b11_011_1110_0000_000: propName = nameof(CpuThreadState.CntfrqEl0); break;
@@ -65,8 +65,8 @@ namespace ChocolArm64.Instructions
 
             switch (GetPackedId(op))
             {
-                case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr);     break;
-                case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr);     break;
+                case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.CFpcr);    break;
+                case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.CFpsr);    break;
                 case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0); break;
 
                 default: throw new NotImplementedException($"Unknown MSR at {op.Position:x16}");
diff --git a/ChocolArm64/Instructions/SoftFloat.cs b/ChocolArm64/Instructions/SoftFloat.cs
index 3521ad152..e78932cc4 100644
--- a/ChocolArm64/Instructions/SoftFloat.cs
+++ b/ChocolArm64/Instructions/SoftFloat.cs
@@ -82,7 +82,7 @@ namespace ChocolArm64.Instructions
     {
         public static float FPConvert(ushort valueBits, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat16_32.FPConvert: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat16_32.FPConvert: state.Fpcr = 0x{state.CFpcr:X8}");
 
             double real = valueBits.FPUnpackCv(out FpType type, out bool sign, state);
 
@@ -322,13 +322,13 @@ namespace ChocolArm64.Instructions
         {
             int enable = (int)exc + 8;
 
-            if ((state.Fpcr & (1 << enable)) != 0)
+            if ((state.CFpcr & (1 << enable)) != 0)
             {
                 throw new NotImplementedException("Floating-point trap handling.");
             }
             else
             {
-                state.Fpsr |= 1 << (int)exc;
+                state.CFpsr |= 1 << (int)exc;
             }
         }
     }
@@ -337,7 +337,7 @@ namespace ChocolArm64.Instructions
     {
         public static ushort FPConvert(float value, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32_16.FPConvert: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32_16.FPConvert: state.Fpcr = 0x{state.CFpcr:X8}");
 
             double real = value.FPUnpackCv(out FpType type, out bool sign, out uint valueBits, state);
 
@@ -609,13 +609,13 @@ namespace ChocolArm64.Instructions
         {
             int enable = (int)exc + 8;
 
-            if ((state.Fpcr & (1 << enable)) != 0)
+            if ((state.CFpcr & (1 << enable)) != 0)
             {
                 throw new NotImplementedException("Floating-point trap handling.");
             }
             else
             {
-                state.Fpsr |= 1 << (int)exc;
+                state.CFpsr |= 1 << (int)exc;
             }
         }
     }
@@ -624,7 +624,7 @@ namespace ChocolArm64.Instructions
     {
         public static float FPAdd(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPAdd: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPAdd: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -672,7 +672,7 @@ namespace ChocolArm64.Instructions
 
         public static int FPCompare(float value1, float value2, bool signalNaNs, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompare: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state);
@@ -709,7 +709,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPCompareEQ(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareEQ: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
             value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -735,7 +735,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPCompareGE(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareGE: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
             value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -758,7 +758,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPCompareGT(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareGT: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
             value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -782,7 +782,7 @@ namespace ChocolArm64.Instructions
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static float FPCompareLE(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareLE: state.Fpcr = 0x{state.CFpcr:X8}");
 
             return FPCompareGE(value2, value1, state);
         }
@@ -790,14 +790,14 @@ namespace ChocolArm64.Instructions
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static float FPCompareLT(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPCompareLT: state.Fpcr = 0x{state.CFpcr:X8}");
 
             return FPCompareGT(value2, value1, state);
         }
 
         public static float FPDiv(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -846,7 +846,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPMax(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMax: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMax: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -899,7 +899,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPMaxNum(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMaxNum: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMaxNum: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1.FPUnpack(out FpType type1, out _, out _, state);
             value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -918,7 +918,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPMin(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMin: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMin: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -971,7 +971,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPMinNum(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMinNum: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMinNum: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1.FPUnpack(out FpType type1, out _, out _, state);
             value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -990,7 +990,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPMul(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMul: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMul: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -1038,7 +1038,7 @@ namespace ChocolArm64.Instructions
             float value2,
             CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMulAdd: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMulAdd: state.Fpcr = 0x{state.CFpcr:X8}");
 
             valueA = valueA.FPUnpack(out FpType typeA, out bool signA, out uint addend, state);
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1,    state);
@@ -1108,7 +1108,7 @@ namespace ChocolArm64.Instructions
             float value2,
             CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMulSub: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMulSub: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPNeg();
 
@@ -1117,7 +1117,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPMulX(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPMulX: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPMulX: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -1159,7 +1159,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPRecipEstimate(float value, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRecipEstimate: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value.FPUnpack(out FpType type, out bool sign, out uint op, state);
 
@@ -1248,7 +1248,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPRecipStepFused(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPNeg();
 
@@ -1291,7 +1291,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPRecpX(float value, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecpX: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRecpX: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value.FPUnpack(out FpType type, out bool sign, out uint op, state);
 
@@ -1315,7 +1315,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPRSqrtEstimate(float value, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRSqrtEstimate: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value.FPUnpack(out FpType type, out bool sign, out uint op, state);
 
@@ -1380,7 +1380,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPRSqrtStepFused(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPNeg();
 
@@ -1423,7 +1423,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPSqrt(float value, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPSqrt: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPSqrt: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value = value.FPUnpack(out FpType type, out bool sign, out uint op, state);
 
@@ -1464,7 +1464,7 @@ namespace ChocolArm64.Instructions
 
         public static float FPSub(float value1, float value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPSub: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat32.FPSub: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out uint op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out uint op2, state);
@@ -1693,13 +1693,13 @@ namespace ChocolArm64.Instructions
         {
             int enable = (int)exc + 8;
 
-            if ((state.Fpcr & (1 << enable)) != 0)
+            if ((state.CFpcr & (1 << enable)) != 0)
             {
                 throw new NotImplementedException("Floating-point trap handling.");
             }
             else
             {
-                state.Fpsr |= 1 << (int)exc;
+                state.CFpsr |= 1 << (int)exc;
             }
         }
     }
@@ -1708,7 +1708,7 @@ namespace ChocolArm64.Instructions
     {
         public static double FPAdd(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPAdd: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPAdd: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -1756,7 +1756,7 @@ namespace ChocolArm64.Instructions
 
         public static int FPCompare(double value1, double value2, bool signalNaNs, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompare: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state);
@@ -1793,7 +1793,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPCompareEQ(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareEQ: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
             value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -1819,7 +1819,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPCompareGE(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareGE: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
             value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -1842,7 +1842,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPCompareGT(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareGT: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
             value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -1866,7 +1866,7 @@ namespace ChocolArm64.Instructions
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static double FPCompareLE(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareLE: state.Fpcr = 0x{state.CFpcr:X8}");
 
             return FPCompareGE(value2, value1, state);
         }
@@ -1874,14 +1874,14 @@ namespace ChocolArm64.Instructions
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static double FPCompareLT(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPCompareLT: state.Fpcr = 0x{state.CFpcr:X8}");
 
             return FPCompareGT(value2, value1, state);
         }
 
         public static double FPDiv(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -1930,7 +1930,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPMax(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMax: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMax: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -1983,7 +1983,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPMaxNum(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMaxNum: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMaxNum: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1.FPUnpack(out FpType type1, out _, out _, state);
             value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -2002,7 +2002,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPMin(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMin: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMin: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -2055,7 +2055,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPMinNum(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMinNum: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMinNum: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1.FPUnpack(out FpType type1, out _, out _, state);
             value2.FPUnpack(out FpType type2, out _, out _, state);
@@ -2074,7 +2074,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPMul(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMul: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMul: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -2122,7 +2122,7 @@ namespace ChocolArm64.Instructions
             double value2,
             CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMulAdd: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMulAdd: state.Fpcr = 0x{state.CFpcr:X8}");
 
             valueA = valueA.FPUnpack(out FpType typeA, out bool signA, out ulong addend, state);
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1,    state);
@@ -2192,7 +2192,7 @@ namespace ChocolArm64.Instructions
             double value2,
             CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMulSub: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMulSub: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPNeg();
 
@@ -2201,7 +2201,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPMulX(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPMulX: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPMulX: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -2243,7 +2243,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPRecipEstimate(double value, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRecipEstimate: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
 
@@ -2332,7 +2332,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPRecipStepFused(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPNeg();
 
@@ -2375,7 +2375,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPRecpX(double value, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecpX: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRecpX: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
 
@@ -2399,7 +2399,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPRSqrtEstimate(double value, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRSqrtEstimate: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
 
@@ -2464,7 +2464,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPRSqrtStepFused(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPNeg();
 
@@ -2507,7 +2507,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPSqrt(double value, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPSqrt: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPSqrt: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value = value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
 
@@ -2548,7 +2548,7 @@ namespace ChocolArm64.Instructions
 
         public static double FPSub(double value1, double value2, CpuThreadState state)
         {
-            Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPSub: state.Fpcr = 0x{state.Fpcr:X8}");
+            Debug.WriteLineIf(state.CFpcr != 0, $"SoftFloat64.FPSub: state.Fpcr = 0x{state.CFpcr:X8}");
 
             value1 = value1.FPUnpack(out FpType type1, out bool sign1, out ulong op1, state);
             value2 = value2.FPUnpack(out FpType type2, out bool sign2, out ulong op2, state);
@@ -2777,13 +2777,13 @@ namespace ChocolArm64.Instructions
         {
             int enable = (int)exc + 8;
 
-            if ((state.Fpcr & (1 << enable)) != 0)
+            if ((state.CFpcr & (1 << enable)) != 0)
             {
                 throw new NotImplementedException("Floating-point trap handling.");
             }
             else
             {
-                state.Fpsr |= 1 << (int)exc;
+                state.CFpsr |= 1 << (int)exc;
             }
         }
     }
diff --git a/ChocolArm64/Memory/MemoryManager.cs b/ChocolArm64/Memory/MemoryManager.cs
index 364f6b58a..2347f1eb4 100644
--- a/ChocolArm64/Memory/MemoryManager.cs
+++ b/ChocolArm64/Memory/MemoryManager.cs
@@ -11,7 +11,7 @@ using static ChocolArm64.Memory.MemoryManagement;
 
 namespace ChocolArm64.Memory
 {
-    public unsafe class MemoryManager : IMemory, IDisposable
+    public unsafe class MemoryManager : ARMeilleure.Memory.IMemoryManager
     {
         public const int PageBits = 12;
         public const int PageSize = 1 << PageBits;
@@ -880,7 +880,7 @@ namespace ChocolArm64.Memory
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public void WriteVector128(long position, Vector128<float> value)
+        public void WriteVector128Internal(long position, Vector128<float> value)
         {
             if (Sse.IsSupported && (position & 15) == 0)
             {
@@ -893,6 +893,12 @@ namespace ChocolArm64.Memory
             }
         }
 
+        public void WriteVector128(long position, ARMeilleure.State.V128 value)
+        {
+            WriteUInt64(position + 0, value.GetUInt64(0));
+            WriteUInt64(position + 8, value.GetUInt64(1));
+        }
+
         public void WriteBytes(long position, byte[] data)
         {
             long endAddr = position + data.Length;
diff --git a/ChocolArm64/Optimizations.cs b/ChocolArm64/Optimizations.cs
index cbb8131f5..24828ebfb 100644
--- a/ChocolArm64/Optimizations.cs
+++ b/ChocolArm64/Optimizations.cs
@@ -1,24 +1,27 @@
 using System.Runtime.Intrinsics.X86;
 
-public static class Optimizations
+namespace ChocolArm64
 {
-    public static bool AssumeStrictAbiCompliance { get; set; }
+    public static class Optimizations
+    {
+        public static bool AssumeStrictAbiCompliance { get; set; } = true;
 
-    public static bool FastFP { get; set; } = true;
+        public static bool FastFP { get; set; } = true;
 
-    private const bool UseAllSseIfAvailable = true;
+        private const bool UseAllSseIfAvailable = true;
 
-    public static bool UseSseIfAvailable   { get; set; } = UseAllSseIfAvailable;
-    public static bool UseSse2IfAvailable  { get; set; } = UseAllSseIfAvailable;
-    public static bool UseSse3IfAvailable  { get; set; } = UseAllSseIfAvailable;
-    public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
-    public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
-    public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
+        public static bool UseSseIfAvailable   { get; set; } = UseAllSseIfAvailable;
+        public static bool UseSse2IfAvailable  { get; set; } = UseAllSseIfAvailable;
+        public static bool UseSse3IfAvailable  { get; set; } = UseAllSseIfAvailable;
+        public static bool UseSsse3IfAvailable { get; set; } = UseAllSseIfAvailable;
+        public static bool UseSse41IfAvailable { get; set; } = UseAllSseIfAvailable;
+        public static bool UseSse42IfAvailable { get; set; } = UseAllSseIfAvailable;
 
-    internal static bool UseSse   => UseSseIfAvailable   && Sse.IsSupported;
-    internal static bool UseSse2  => UseSse2IfAvailable  && Sse2.IsSupported;
-    internal static bool UseSse3  => UseSse3IfAvailable  && Sse3.IsSupported;
-    internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
-    internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
-    internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
+        internal static bool UseSse   => UseSseIfAvailable   && Sse.IsSupported;
+        internal static bool UseSse2  => UseSse2IfAvailable  && Sse2.IsSupported;
+        internal static bool UseSse3  => UseSse3IfAvailable  && Sse3.IsSupported;
+        internal static bool UseSsse3 => UseSsse3IfAvailable && Ssse3.IsSupported;
+        internal static bool UseSse41 => UseSse41IfAvailable && Sse41.IsSupported;
+        internal static bool UseSse42 => UseSse42IfAvailable && Sse42.IsSupported;
+    }
 }
\ No newline at end of file
diff --git a/ChocolArm64/State/CpuThreadState.cs b/ChocolArm64/State/CpuThreadState.cs
index 424f17258..e4baaefa4 100644
--- a/ChocolArm64/State/CpuThreadState.cs
+++ b/ChocolArm64/State/CpuThreadState.cs
@@ -1,13 +1,14 @@
-using ChocolArm64.Events;
 using ChocolArm64.Translation;
 using System;
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.Intrinsics;
 
+using static ChocolArm64.Instructions.VectorHelper;
+
 namespace ChocolArm64.State
 {
-    public class CpuThreadState
+    public class CpuThreadState : ARMeilleure.State.IExecutionContext
     {
         private const int MinCountForCheck = 40000;
 
@@ -24,7 +25,7 @@ namespace ChocolArm64.State
                                 V16, V17, V18, V19, V20, V21, V22, V23,
                                 V24, V25, V26, V27, V28, V29, V30, V31;
 
-        public bool Aarch32;
+        public bool IsAarch32 { get; set; }
 
         public bool Thumb;
         public bool BigEndian;
@@ -45,8 +46,20 @@ namespace ChocolArm64.State
         public long TpidrEl0 { get; set; }
         public long Tpidr    { get; set; }
 
-        public int Fpcr { get; set; }
-        public int Fpsr { get; set; }
+        public int CFpcr { get; set; }
+        public int CFpsr { get; set; }
+
+        public ARMeilleure.State.FPCR Fpcr
+        {
+            get => (ARMeilleure.State.FPCR)CFpcr;
+            set => CFpcr = (int)value;
+        }
+
+        public ARMeilleure.State.FPSR Fpsr
+        {
+            get => (ARMeilleure.State.FPSR)CFpsr;
+            set => CFpsr = (int)value;
+        }
 
         public int Psr
         {
@@ -73,10 +86,10 @@ namespace ChocolArm64.State
             }
         }
 
-        public event EventHandler<EventArgs>              Interrupt;
-        public event EventHandler<InstExceptionEventArgs> Break;
-        public event EventHandler<InstExceptionEventArgs> SvcCall;
-        public event EventHandler<InstUndefinedEventArgs> Undefined;
+        public event EventHandler<EventArgs>                                Interrupt;
+        public event EventHandler<ARMeilleure.State.InstExceptionEventArgs> Break;
+        public event EventHandler<ARMeilleure.State.InstExceptionEventArgs> SupervisorCall;
+        public event EventHandler<ARMeilleure.State.InstUndefinedEventArgs> Undefined;
 
         private static Stopwatch _tickCounter;
 
@@ -92,6 +105,8 @@ namespace ChocolArm64.State
         public CpuThreadState()
         {
             ClearExclusiveAddress();
+
+            Running = true;
         }
 
         static CpuThreadState()
@@ -151,29 +166,165 @@ namespace ChocolArm64.State
             }
         }
 
-        internal void RequestInterrupt()
+        public ulong GetX(int index)
+        {
+            switch (index)
+            {
+                case 0:  return X0;
+                case 1:  return X1;
+                case 2:  return X2;
+                case 3:  return X3;
+                case 4:  return X4;
+                case 5:  return X5;
+                case 6:  return X6;
+                case 7:  return X7;
+                case 8:  return X8;
+                case 9:  return X9;
+                case 10: return X10;
+                case 11: return X11;
+                case 12: return X12;
+                case 13: return X13;
+                case 14: return X14;
+                case 15: return X15;
+                case 16: return X16;
+                case 17: return X17;
+                case 18: return X18;
+                case 19: return X19;
+                case 20: return X20;
+                case 21: return X21;
+                case 22: return X22;
+                case 23: return X23;
+                case 24: return X24;
+                case 25: return X25;
+                case 26: return X26;
+                case 27: return X27;
+                case 28: return X28;
+                case 29: return X29;
+                case 30: return X30;
+                case 31: return X31;
+
+                default: throw new ArgumentOutOfRangeException(nameof(index));
+            }
+        }
+
+        public void SetX(int index, ulong value)
+        {
+            switch (index)
+            {
+                case 0:  X0  = value; break;
+                case 1:  X1  = value; break;
+                case 2:  X2  = value; break;
+                case 3:  X3  = value; break;
+                case 4:  X4  = value; break;
+                case 5:  X5  = value; break;
+                case 6:  X6  = value; break;
+                case 7:  X7  = value; break;
+                case 8:  X8  = value; break;
+                case 9:  X9  = value; break;
+                case 10: X10 = value; break;
+                case 11: X11 = value; break;
+                case 12: X12 = value; break;
+                case 13: X13 = value; break;
+                case 14: X14 = value; break;
+                case 15: X15 = value; break;
+                case 16: X16 = value; break;
+                case 17: X17 = value; break;
+                case 18: X18 = value; break;
+                case 19: X19 = value; break;
+                case 20: X20 = value; break;
+                case 21: X21 = value; break;
+                case 22: X22 = value; break;
+                case 23: X23 = value; break;
+                case 24: X24 = value; break;
+                case 25: X25 = value; break;
+                case 26: X26 = value; break;
+                case 27: X27 = value; break;
+                case 28: X28 = value; break;
+                case 29: X29 = value; break;
+                case 30: X30 = value; break;
+                case 31: X31 = value; break;
+
+                default: throw new ArgumentOutOfRangeException(nameof(index));
+            }
+        }
+
+        public ARMeilleure.State.V128 GetV(int index)
+        {
+            switch (index)
+            {
+                case 0:  return new ARMeilleure.State.V128(VectorExtractIntZx(V0,  0, 3), VectorExtractIntZx(V0,  1, 3));
+                case 1:  return new ARMeilleure.State.V128(VectorExtractIntZx(V1,  0, 3), VectorExtractIntZx(V1,  1, 3));
+                case 2:  return new ARMeilleure.State.V128(VectorExtractIntZx(V2,  0, 3), VectorExtractIntZx(V2,  1, 3));
+                case 3:  return new ARMeilleure.State.V128(VectorExtractIntZx(V3,  0, 3), VectorExtractIntZx(V3,  1, 3));
+                case 4:  return new ARMeilleure.State.V128(VectorExtractIntZx(V4,  0, 3), VectorExtractIntZx(V4,  1, 3));
+                case 5:  return new ARMeilleure.State.V128(VectorExtractIntZx(V5,  0, 3), VectorExtractIntZx(V5,  1, 3));
+                case 6:  return new ARMeilleure.State.V128(VectorExtractIntZx(V6,  0, 3), VectorExtractIntZx(V6,  1, 3));
+                case 7:  return new ARMeilleure.State.V128(VectorExtractIntZx(V7,  0, 3), VectorExtractIntZx(V7,  1, 3));
+                case 8:  return new ARMeilleure.State.V128(VectorExtractIntZx(V8,  0, 3), VectorExtractIntZx(V8,  1, 3));
+                case 9:  return new ARMeilleure.State.V128(VectorExtractIntZx(V9,  0, 3), VectorExtractIntZx(V9,  1, 3));
+                case 10: return new ARMeilleure.State.V128(VectorExtractIntZx(V10, 0, 3), VectorExtractIntZx(V10, 1, 3));
+                case 11: return new ARMeilleure.State.V128(VectorExtractIntZx(V11, 0, 3), VectorExtractIntZx(V11, 1, 3));
+                case 12: return new ARMeilleure.State.V128(VectorExtractIntZx(V12, 0, 3), VectorExtractIntZx(V12, 1, 3));
+                case 13: return new ARMeilleure.State.V128(VectorExtractIntZx(V13, 0, 3), VectorExtractIntZx(V13, 1, 3));
+                case 14: return new ARMeilleure.State.V128(VectorExtractIntZx(V14, 0, 3), VectorExtractIntZx(V14, 1, 3));
+                case 15: return new ARMeilleure.State.V128(VectorExtractIntZx(V15, 0, 3), VectorExtractIntZx(V15, 1, 3));
+                case 16: return new ARMeilleure.State.V128(VectorExtractIntZx(V16, 0, 3), VectorExtractIntZx(V16, 1, 3));
+                case 17: return new ARMeilleure.State.V128(VectorExtractIntZx(V17, 0, 3), VectorExtractIntZx(V17, 1, 3));
+                case 18: return new ARMeilleure.State.V128(VectorExtractIntZx(V18, 0, 3), VectorExtractIntZx(V18, 1, 3));
+                case 19: return new ARMeilleure.State.V128(VectorExtractIntZx(V19, 0, 3), VectorExtractIntZx(V19, 1, 3));
+                case 20: return new ARMeilleure.State.V128(VectorExtractIntZx(V20, 0, 3), VectorExtractIntZx(V20, 1, 3));
+                case 21: return new ARMeilleure.State.V128(VectorExtractIntZx(V21, 0, 3), VectorExtractIntZx(V21, 1, 3));
+                case 22: return new ARMeilleure.State.V128(VectorExtractIntZx(V22, 0, 3), VectorExtractIntZx(V22, 1, 3));
+                case 23: return new ARMeilleure.State.V128(VectorExtractIntZx(V23, 0, 3), VectorExtractIntZx(V23, 1, 3));
+                case 24: return new ARMeilleure.State.V128(VectorExtractIntZx(V24, 0, 3), VectorExtractIntZx(V24, 1, 3));
+                case 25: return new ARMeilleure.State.V128(VectorExtractIntZx(V25, 0, 3), VectorExtractIntZx(V25, 1, 3));
+                case 26: return new ARMeilleure.State.V128(VectorExtractIntZx(V26, 0, 3), VectorExtractIntZx(V26, 1, 3));
+                case 27: return new ARMeilleure.State.V128(VectorExtractIntZx(V27, 0, 3), VectorExtractIntZx(V27, 1, 3));
+                case 28: return new ARMeilleure.State.V128(VectorExtractIntZx(V28, 0, 3), VectorExtractIntZx(V28, 1, 3));
+                case 29: return new ARMeilleure.State.V128(VectorExtractIntZx(V29, 0, 3), VectorExtractIntZx(V29, 1, 3));
+                case 30: return new ARMeilleure.State.V128(VectorExtractIntZx(V30, 0, 3), VectorExtractIntZx(V30, 1, 3));
+                case 31: return new ARMeilleure.State.V128(VectorExtractIntZx(V31, 0, 3), VectorExtractIntZx(V31, 1, 3));
+
+                default: throw new ArgumentOutOfRangeException(nameof(index));
+            }
+        }
+
+        public bool GetPstateFlag(ARMeilleure.State.PState flag)
+        {
+            switch (flag)
+            {
+                case ARMeilleure.State.PState.NFlag: return Negative;
+                case ARMeilleure.State.PState.ZFlag: return Zero;
+                case ARMeilleure.State.PState.CFlag: return Carry;
+                case ARMeilleure.State.PState.VFlag: return Overflow;
+
+                default: throw new ArgumentOutOfRangeException(nameof(flag));
+            }
+        }
+
+        public void RequestInterrupt()
         {
             _interrupted = true;
         }
 
         internal void OnBreak(long position, int imm)
         {
-            Break?.Invoke(this, new InstExceptionEventArgs(position, imm));
+            Break?.Invoke(this, new ARMeilleure.State.InstExceptionEventArgs((ulong)position, imm));
         }
 
         internal void OnSvcCall(long position, int imm)
         {
-            SvcCall?.Invoke(this, new InstExceptionEventArgs(position, imm));
+            SupervisorCall?.Invoke(this, new ARMeilleure.State.InstExceptionEventArgs((ulong)position, imm));
         }
 
         internal void OnUndefined(long position, int rawOpCode)
         {
-            Undefined?.Invoke(this, new InstUndefinedEventArgs(position, rawOpCode));
+            Undefined?.Invoke(this, new ARMeilleure.State.InstUndefinedEventArgs((ulong)position, rawOpCode));
         }
 
         internal ExecutionMode GetExecutionMode()
         {
-            if (!Aarch32)
+            if (!IsAarch32)
             {
                 return ExecutionMode.Aarch64;
             }
@@ -185,17 +336,19 @@ namespace ChocolArm64.State
 
         internal bool GetFpcrFlag(Fpcr flag)
         {
-            return (Fpcr & (1 << (int)flag)) != 0;
+            return (CFpcr & (1 << (int)flag)) != 0;
         }
 
         internal void SetFpsrFlag(Fpsr flag)
         {
-            Fpsr |= 1 << (int)flag;
+            CFpsr |= 1 << (int)flag;
         }
 
         internal RoundMode FPRoundingMode()
         {
-            return (RoundMode)((Fpcr >> (int)State.Fpcr.RMode) & 3);
+            return (RoundMode)((CFpcr >> (int)State.Fpcr.RMode) & 3);
         }
+
+        public void Dispose() { }
     }
 }
diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs
index 0803df09b..ab8f474a4 100644
--- a/ChocolArm64/Translation/Translator.cs
+++ b/ChocolArm64/Translation/Translator.cs
@@ -9,7 +9,7 @@ using System.Threading;
 
 namespace ChocolArm64.Translation
 {
-    public class Translator
+    public class Translator : ARMeilleure.Translation.ITranslator
     {
         private MemoryManager _memory;
 
@@ -38,24 +38,18 @@ namespace ChocolArm64.Translation
             _queue = new TranslatorQueue();
         }
 
-        internal void ExecuteSubroutine(CpuThread thread, long position)
+        public void Execute(ARMeilleure.State.IExecutionContext ctx, ulong address)
         {
+            CpuThreadState state = (CpuThreadState)ctx;
+
+            long position = (long)address;
+
             if (Interlocked.Increment(ref _threadCount) == 1)
             {
                 _backgroundTranslator = new Thread(TranslateQueuedSubs);
                 _backgroundTranslator.Start();
             }
 
-            ExecuteSubroutine(thread.ThreadState, position);
-
-            if (Interlocked.Decrement(ref _threadCount) == 0)
-            {
-                _queue.ForceSignal();
-            }
-        }
-
-        private void ExecuteSubroutine(CpuThreadState state, long position)
-        {
             state.CurrentTranslator = this;
 
             do
@@ -75,6 +69,11 @@ namespace ChocolArm64.Translation
             while (position != 0 && state.Running);
 
             state.CurrentTranslator = null;
+
+            if (Interlocked.Decrement(ref _threadCount) == 0)
+            {
+                _queue.ForceSignal();
+            }
         }
 
         internal ArmSubroutine GetOrTranslateSubroutine(CpuThreadState state, long position, CallType cs)
diff --git a/Ryujinx.Audio/Ryujinx.Audio.csproj b/Ryujinx.Audio/Ryujinx.Audio.csproj
index a6a34f40f..e25066eee 100644
--- a/Ryujinx.Audio/Ryujinx.Audio.csproj
+++ b/Ryujinx.Audio/Ryujinx.Audio.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
   </PropertyGroup>
 
diff --git a/Ryujinx.Common/Ryujinx.Common.csproj b/Ryujinx.Common/Ryujinx.Common.csproj
index cf078db85..86c6c570d 100644
--- a/Ryujinx.Common/Ryujinx.Common.csproj
+++ b/Ryujinx.Common/Ryujinx.Common.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
   </PropertyGroup>
 
@@ -25,7 +25,7 @@
     <DefineConstants>TRACE;USE_PROFILING</DefineConstants>
     <Optimize>true</Optimize>
   </PropertyGroup>
-  
+
   <ItemGroup>
     <PackageReference Include="Utf8Json" Version="1.3.7" />
   </ItemGroup>
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs
index 2e78cf142..62dae00b5 100644
--- a/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs
+++ b/Ryujinx.Graphics/Graphics3d/Texture/ImageUtils.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using OpenTK.Graphics.OpenGL;
 using Ryujinx.Graphics.Gal;
 using Ryujinx.Graphics.Memory;
@@ -229,7 +229,7 @@ namespace Ryujinx.Graphics.Texture
 
         public static byte[] ReadTexture(IMemory memory, GalImage image, long position)
         {
-            MemoryManager cpuMemory;
+            IMemoryManager cpuMemory;
 
             if (memory is NvGpuVmm vmm)
             {
@@ -237,7 +237,7 @@ namespace Ryujinx.Graphics.Texture
             }
             else
             {
-                cpuMemory = (MemoryManager)memory;
+                cpuMemory = (IMemoryManager)memory;
             }
 
             ISwizzle swizzle = TextureHelper.GetSwizzle(image);
@@ -251,7 +251,6 @@ namespace Ryujinx.Graphics.Texture
             // Note: Each row of the texture needs to be aligned to 4 bytes.
             int pitch = (width * bytesPerPixel + 3) & ~3;
 
-
             int dataLayerSize = height * pitch * depth;
             byte[] data = new byte[dataLayerSize * image.LayerCount];
 
diff --git a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs
index 1de81008e..22b803db3 100644
--- a/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs
+++ b/Ryujinx.Graphics/Graphics3d/Texture/TextureHelper.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common;
 using Ryujinx.Graphics.Gal;
 using Ryujinx.Graphics.Memory;
@@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Texture
             }
         }
 
-        public static (MemoryManager Memory, long Position) GetMemoryAndPosition(
+        public static (IMemoryManager Memory, long Position) GetMemoryAndPosition(
             IMemory memory,
             long    position)
         {
@@ -47,7 +47,7 @@ namespace Ryujinx.Graphics.Texture
                 return (vmm.Memory, vmm.GetPhysicalAddress(position));
             }
 
-            return ((MemoryManager)memory, position);
+            return ((IMemoryManager)memory, position);
         }
     }
 }
diff --git a/Ryujinx.Graphics/Memory/NvGpuVmm.cs b/Ryujinx.Graphics/Memory/NvGpuVmm.cs
index fea99587d..d8ccd6c74 100644
--- a/Ryujinx.Graphics/Memory/NvGpuVmm.cs
+++ b/Ryujinx.Graphics/Memory/NvGpuVmm.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Graphics.Gal;
 using System;
 
@@ -23,7 +23,7 @@ namespace Ryujinx.Graphics.Memory
         private const int PtLvl0Bit = PtPageBits + PtLvl1Bits;
         private const int PtLvl1Bit = PtPageBits;
 
-        public MemoryManager Memory { get; private set; }
+        public IMemoryManager Memory { get; private set; }
 
         private NvGpuVmmCache _cache;
 
@@ -32,7 +32,7 @@ namespace Ryujinx.Graphics.Memory
 
         private long[][] _pageTable;
 
-        public NvGpuVmm(MemoryManager memory)
+        public NvGpuVmm(IMemoryManager memory)
         {
             Memory = memory;
 
diff --git a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs
index ab5ea288c..37ead4e0a 100644
--- a/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs
+++ b/Ryujinx.Graphics/Memory/NvGpuVmmCache.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using System.Collections.Concurrent;
 
 namespace Ryujinx.Graphics.Memory
@@ -12,9 +12,9 @@ namespace Ryujinx.Graphics.Memory
 
         private ConcurrentDictionary<long, int>[] _cachedPages;
 
-        private MemoryManager _memory;
+        private IMemoryManager _memory;
 
-        public NvGpuVmmCache(MemoryManager memory)
+        public NvGpuVmmCache(IMemoryManager memory)
         {
             _memory = memory;
 
diff --git a/Ryujinx.Graphics/Ryujinx.Graphics.csproj b/Ryujinx.Graphics/Ryujinx.Graphics.csproj
index 740008955..e2bf16930 100644
--- a/Ryujinx.Graphics/Ryujinx.Graphics.csproj
+++ b/Ryujinx.Graphics/Ryujinx.Graphics.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
   </PropertyGroup>
 
@@ -32,8 +32,9 @@
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\ChocolArm64\ChocolArm64.csproj" />
     <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+    <ProjectReference Include="..\ARMeilleure\ARMeilleure.csproj" />
+    <ProjectReference Include="..\Ryujinx.Profiler\Ryujinx.Profiler.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/Ryujinx.Graphics/VDec/VideoDecoder.cs b/Ryujinx.Graphics/VDec/VideoDecoder.cs
index 3ebb93f42..9bf60c31b 100644
--- a/Ryujinx.Graphics/VDec/VideoDecoder.cs
+++ b/Ryujinx.Graphics/VDec/VideoDecoder.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Graphics.Gal;
 using Ryujinx.Graphics.Memory;
 using Ryujinx.Graphics.Texture;
diff --git a/Ryujinx.HLE/DeviceMemory.cs b/Ryujinx.HLE/DeviceMemory.cs
index 3553a6e71..0ead17473 100644
--- a/Ryujinx.HLE/DeviceMemory.cs
+++ b/Ryujinx.HLE/DeviceMemory.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using System;
 using System.Runtime.InteropServices;
 
diff --git a/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs b/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs
index 84bb1fc59..dfbd6c272 100644
--- a/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs
+++ b/Ryujinx.HLE/Exceptions/UndefinedInstructionException.cs
@@ -8,6 +8,6 @@ namespace Ryujinx.HLE.Exceptions
 
         public UndefinedInstructionException() : base() { }
 
-        public UndefinedInstructionException(long position, int opCode) : base(string.Format(ExMsg, position, opCode)) { }
+        public UndefinedInstructionException(ulong address, int opCode) : base(string.Format(ExMsg, address, opCode)) { }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Homebrew.cs b/Ryujinx.HLE/HOS/Homebrew.cs
index b11a46404..8e54f82c1 100644
--- a/Ryujinx.HLE/HOS/Homebrew.cs
+++ b/Ryujinx.HLE/HOS/Homebrew.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using System.Text;
 
 namespace Ryujinx.HLE.HOS
@@ -8,7 +8,7 @@ namespace Ryujinx.HLE.HOS
         public const string TemporaryNroSuffix = ".ryu_tmp.nro";
 
         // http://switchbrew.org/index.php?title=Homebrew_ABI
-        public static void WriteHbAbiData(MemoryManager memory, long position, int mainThreadHandle, string switchPath)
+        public static void WriteHbAbiData(IMemoryManager memory, long position, int mainThreadHandle, string switchPath)
         {
             // MainThreadHandle.
             WriteConfigEntry(memory, ref position, 1, 0, mainThreadHandle);
@@ -31,7 +31,7 @@ namespace Ryujinx.HLE.HOS
         }
 
         private static void WriteConfigEntry(
-            MemoryManager  memory,
+            IMemoryManager memory,
             ref long       position,
             int            key,
             int            flags  = 0,
@@ -46,7 +46,7 @@ namespace Ryujinx.HLE.HOS
             position += 0x18;
         }
 
-        public static string ReadHbAbiNextLoadPath(MemoryManager memory, long position)
+        public static string ReadHbAbiNextLoadPath(IMemoryManager memory, long position)
         {
             string fileName = null;
 
diff --git a/Ryujinx.HLE/HOS/Horizon.cs b/Ryujinx.HLE/HOS/Horizon.cs
index f8bb345f2..5873223ef 100644
--- a/Ryujinx.HLE/HOS/Horizon.cs
+++ b/Ryujinx.HLE/HOS/Horizon.cs
@@ -110,6 +110,8 @@ namespace Ryujinx.HLE.HOS
 
         public int GlobalAccessLogMode { get; set; }
 
+        public bool UseLegacyJit { get; set; }
+
         internal long HidBaseAddress { get; private set; }
 
         public Horizon(Switch device)
diff --git a/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs b/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs
index e940d774c..50ab3d100 100644
--- a/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs
+++ b/Ryujinx.HLE/HOS/Ipc/IpcHandler.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.HLE.HOS.Kernel.Common;
 using Ryujinx.HLE.HOS.Kernel.Ipc;
 using Ryujinx.HLE.HOS.Kernel.Process;
@@ -13,7 +13,7 @@ namespace Ryujinx.HLE.HOS.Ipc
         public static KernelResult IpcCall(
             Switch         device,
             KProcess       process,
-            MemoryManager  memory,
+            IMemoryManager memory,
             KThread        thread,
             KClientSession session,
             IpcMessage     request,
diff --git a/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs b/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs
index 0fcb31483..62330d6ba 100644
--- a/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Common/KernelTransfer.cs
@@ -1,5 +1,5 @@
 using Ryujinx.HLE.HOS.Kernel.Process;
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 
 namespace Ryujinx.HLE.HOS.Kernel.Common
 {
diff --git a/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs b/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs
index 448ae54c0..fd80b3b9e 100644
--- a/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Memory/KMemoryManager.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common;
 using Ryujinx.HLE.HOS.Kernel.Common;
 using Ryujinx.HLE.HOS.Kernel.Process;
@@ -29,7 +29,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory
 
         private LinkedList<KMemoryBlock> _blocks;
 
-        private MemoryManager _cpuMemory;
+        private IMemoryManager _cpuMemory;
 
         private Horizon _system;
 
@@ -72,7 +72,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory
 
         private MersenneTwister _randomNumberGenerator;
 
-        public KMemoryManager(Horizon system, MemoryManager cpuMemory)
+        public KMemoryManager(Horizon system, IMemoryManager cpuMemory)
         {
             _system    = system;
             _cpuMemory = cpuMemory;
diff --git a/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs b/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs
index 223bf5dae..e2ca44b59 100644
--- a/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Process/HleProcessDebugger.cs
@@ -1,5 +1,5 @@
-using ChocolArm64.Memory;
-using ChocolArm64.State;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
 using Ryujinx.HLE.HOS.Diagnostics.Demangler;
 using Ryujinx.HLE.HOS.Kernel.Memory;
 using Ryujinx.HLE.Loaders.Elf;
@@ -40,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
             _images = new List<Image>();
         }
 
-        public string GetGuestStackTrace(CpuThreadState threadState)
+        public string GetGuestStackTrace(IExecutionContext context)
         {
             EnsureLoaded();
 
@@ -74,7 +74,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
             }
 
             // TODO: ARM32.
-            long framePointer = (long)threadState.X29;
+            long framePointer = (long)context.GetX(29);
 
             trace.AppendLine($"Process: {_owner.Name}, PID: {_owner.Pid}");
 
@@ -218,7 +218,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
             }
         }
 
-        private void LoadMod0Symbols(MemoryManager memory, long textOffset)
+        private void LoadMod0Symbols(IMemoryManager memory, long textOffset)
         {
             long mod0Offset = textOffset + memory.ReadUInt32(textOffset + 4);
 
@@ -288,7 +288,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
             }
         }
 
-        private ElfSymbol GetSymbol(MemoryManager memory, long address, long strTblAddr)
+        private ElfSymbol GetSymbol(IMemoryManager memory, long address, long strTblAddr)
         {
             int  nameIndex = memory.ReadInt32(address + 0);
             int  info      = memory.ReadByte (address + 4);
diff --git a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs
index 1b5a67722..beb376f64 100644
--- a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs
@@ -1,9 +1,7 @@
-using ChocolArm64;
-using ChocolArm64.Events;
-using ChocolArm64.Memory;
-using ChocolArm64.Translation;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
 using Ryujinx.Common;
-using Ryujinx.Common.Logging;
 using Ryujinx.HLE.Exceptions;
 using Ryujinx.HLE.HOS.Kernel.Common;
 using Ryujinx.HLE.HOS.Kernel.Memory;
@@ -80,9 +78,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
 
         public bool IsPaused { get; private set; }
 
-        public MemoryManager CpuMemory { get; private set; }
+        public IMemoryManager CpuMemory { get; private set; }
 
-        public Translator Translator { get; private set; }
+        public ITranslator Translator { get; private set; }
 
         private SvcHandler _svcHandler;
 
@@ -793,11 +791,11 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
             }
         }
 
-        public void SubscribeThreadEventHandlers(CpuThread context)
+        public void SubscribeThreadEventHandlers(IExecutionContext context)
         {
-            context.ThreadState.Interrupt += InterruptHandler;
-            context.ThreadState.SvcCall   += _svcHandler.SvcCall;
-            context.ThreadState.Undefined += UndefinedInstructionHandler;
+            context.Interrupt      += InterruptHandler;
+            context.SupervisorCall += _svcHandler.SvcCall;
+            context.Undefined      += UndefinedInstructionHandler;
         }
 
         private void InterruptHandler(object sender, EventArgs e)
@@ -1001,9 +999,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
             {
                 foreach (KThread thread in _threads)
                 {
-                    thread.Context.StopExecution();
+                    thread.Context.Running = false;
 
-                    System.Scheduler.CoreManager.Set(thread.Context.Work);
+                    System.Scheduler.CoreManager.Set(thread.HostThread);
                 }
             }
         }
@@ -1024,13 +1022,20 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
 
             bool useFlatPageTable = memRegion == MemoryRegion.Application;
 
-            CpuMemory = new MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable);
+            if (_system.UseLegacyJit)
+            {
+                CpuMemory = new ChocolArm64.Memory.MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable);
+
+                Translator = new ChocolArm64.Translation.Translator((ChocolArm64.Memory.MemoryManager)CpuMemory);
+            }
+            else
+            {
+                CpuMemory = new MemoryManager(_system.Device.Memory.RamPointer, addrSpaceBits, useFlatPageTable);
+
+                Translator = new Translator((MemoryManager)CpuMemory);
+            }
 
             MemoryManager = new KMemoryManager(_system, CpuMemory);
-
-            Translator = new Translator(CpuMemory);
-
-            Translator.CpuTrace += CpuTraceHandler;
         }
 
         public void PrintCurrentThreadStackTrace()
@@ -1038,14 +1043,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Process
             System.Scheduler.GetCurrentThread().PrintGuestStackTrace();
         }
 
-        private void CpuTraceHandler(object sender, CpuTraceEventArgs e)
-        {
-            Logger.PrintInfo(LogClass.Cpu, $"Executing at 0x{e.Position:X16}.");
-        }
-
         private void UndefinedInstructionHandler(object sender, InstUndefinedEventArgs e)
         {
-            throw new UndefinedInstructionException(e.Position, e.RawOpCode);
+            throw new UndefinedInstructionException(e.Address, e.OpCode);
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs
index cf881a793..7509ae048 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcHandler.cs
@@ -1,5 +1,4 @@
-using ChocolArm64.Events;
-using ChocolArm64.State;
+using ARMeilleure.State;
 using Ryujinx.HLE.HOS.Kernel.Process;
 using System;
 
@@ -7,9 +6,9 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
 {
     partial class SvcHandler
     {
-        private Switch        _device;
-        private KProcess      _process;
-        private Horizon       _system;
+        private Switch   _device;
+        private KProcess _process;
+        private Horizon  _system;
 
         public SvcHandler(Switch device, KProcess process)
         {
@@ -20,16 +19,16 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
 
         public void SvcCall(object sender, InstExceptionEventArgs e)
         {
-            Action<SvcHandler, CpuThreadState> svcFunc = SvcTable.GetSvcFunc(e.Id);
+            Action<SvcHandler, IExecutionContext> svcFunc = SvcTable.GetSvcFunc(e.Id);
 
             if (svcFunc == null)
             {
                 throw new NotImplementedException($"SVC 0x{e.Id:X4} is not implemented.");
             }
 
-            CpuThreadState threadState = (CpuThreadState)sender;
+            IExecutionContext context = (IExecutionContext)sender;
 
-            svcFunc(this, threadState);
+            svcFunc(this, context);
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs
index eb7595c0a..7c1c981bf 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcIpc.cs
@@ -83,7 +83,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
 
         public KernelResult SendSyncRequest64(int handle)
         {
-            return SendSyncRequest((ulong)_system.Scheduler.GetCurrentThread().Context.ThreadState.Tpidr, 0x100, handle);
+            return SendSyncRequest((ulong)_system.Scheduler.GetCurrentThread().Context.Tpidr, 0x100, handle);
         }
 
         public KernelResult SendSyncRequestWithUserBuffer64(ulong messagePtr, ulong size, int handle)
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs
index 5f971131c..094e1935f 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcSystem.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common;
 using Ryujinx.Common.Logging;
 using Ryujinx.HLE.Exceptions;
@@ -138,7 +138,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
 
         public ulong GetSystemTick64()
         {
-            return _system.Scheduler.GetCurrentThread().Context.ThreadState.CntpctEl0;
+            return _system.Scheduler.GetCurrentThread().Context.CntpctEl0;
         }
 
         public KernelResult GetProcessId64(int handle, out long pid)
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs
index 23934649f..c1a31da9b 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcTable.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.State;
+using ARMeilleure.State;
 using Ryujinx.Common.Logging;
 using Ryujinx.HLE.HOS.Kernel.Common;
 using System;
@@ -14,7 +14,7 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
 
         private static Dictionary<int, string> _svcFuncs64;
 
-        private static Action<SvcHandler, CpuThreadState>[] _svcTable64;
+        private static Action<SvcHandler, IExecutionContext>[] _svcTable64;
 
         static SvcTable()
         {
@@ -77,10 +77,10 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
                 { 0x78, nameof(SvcHandler.UnmapProcessCodeMemory64)        }
             };
 
-            _svcTable64 = new Action<SvcHandler, CpuThreadState>[0x80];
+            _svcTable64 = new Action<SvcHandler, IExecutionContext>[0x80];
         }
 
-        public static Action<SvcHandler, CpuThreadState> GetSvcFunc(int svcId)
+        public static Action<SvcHandler, IExecutionContext> GetSvcFunc(int svcId)
         {
             if (_svcTable64[svcId] != null)
             {
@@ -95,9 +95,9 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
             return null;
         }
 
-        private static Action<SvcHandler, CpuThreadState> GenerateMethod(string svcName)
+        private static Action<SvcHandler, IExecutionContext> GenerateMethod(string svcName)
         {
-            Type[] argTypes = new Type[] { typeof(SvcHandler), typeof(CpuThreadState) };
+            Type[] argTypes = new Type[] { typeof(SvcHandler), typeof(IExecutionContext) };
 
             DynamicMethod method = new DynamicMethod(svcName, null, argTypes);
 
@@ -183,7 +183,11 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
                 generator.Emit(OpCodes.Conv_I);
 
                 generator.Emit(OpCodes.Ldarg_1);
-                generator.Emit(OpCodes.Ldfld, GetStateFieldX(byRefArgsCount + index));
+                generator.Emit(OpCodes.Ldc_I4, byRefArgsCount + index);
+
+                MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.GetX));
+
+                generator.Emit(OpCodes.Call, info);
 
                 generator.Emit(OpCodes.Box, typeof(ulong));
 
@@ -227,7 +231,11 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
                 else
                 {
                     generator.Emit(OpCodes.Ldarg_1);
-                    generator.Emit(OpCodes.Ldfld, GetStateFieldX(byRefArgsCount + index));
+                    generator.Emit(OpCodes.Ldc_I4, byRefArgsCount + index);
+
+                    MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.GetX));
+
+                    generator.Emit(OpCodes.Call, info);
 
                     ConvertToArgType(argType);
                 }
@@ -258,51 +266,44 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
 
                 generator.Emit(OpCodes.Stloc, tempLocal);
                 generator.Emit(OpCodes.Ldarg_1);
+                generator.Emit(OpCodes.Ldc_I4, outRegIndex++);
                 generator.Emit(OpCodes.Ldloc, tempLocal);
 
                 ConvertToFieldType(retType);
 
-                generator.Emit(OpCodes.Stfld, GetStateFieldX(outRegIndex++));
+                MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.SetX));
+
+                generator.Emit(OpCodes.Call, info);
             }
 
             for (int index = 0; index < locals.Count; index++)
             {
                 generator.Emit(OpCodes.Ldarg_1);
+                generator.Emit(OpCodes.Ldc_I4, outRegIndex++);
                 generator.Emit(OpCodes.Ldloc, locals[index]);
 
                 ConvertToFieldType(locals[index].LocalType);
 
-                generator.Emit(OpCodes.Stfld, GetStateFieldX(outRegIndex++));
+                MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.SetX));
+
+                generator.Emit(OpCodes.Call, info);
             }
 
             // Zero out the remaining unused registers.
             while (outRegIndex < SvcFuncMaxArguments)
             {
                 generator.Emit(OpCodes.Ldarg_1);
+                generator.Emit(OpCodes.Ldc_I4, outRegIndex++);
                 generator.Emit(OpCodes.Ldc_I8, 0L);
-                generator.Emit(OpCodes.Stfld, GetStateFieldX(outRegIndex++));
+
+                MethodInfo info = typeof(IExecutionContext).GetMethod(nameof(IExecutionContext.SetX));
+
+                generator.Emit(OpCodes.Call, info);
             }
 
             generator.Emit(OpCodes.Ret);
 
-            return (Action<SvcHandler, CpuThreadState>)method.CreateDelegate(typeof(Action<SvcHandler, CpuThreadState>));
-        }
-
-        private static FieldInfo GetStateFieldX(int index)
-        {
-            switch (index)
-            {
-                case 0: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X0));
-                case 1: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X1));
-                case 2: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X2));
-                case 3: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X3));
-                case 4: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X4));
-                case 5: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X5));
-                case 6: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X6));
-                case 7: return typeof(CpuThreadState).GetField(nameof(CpuThreadState.X7));
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(index));
+            return (Action<SvcHandler, IExecutionContext>)method.CreateDelegate(typeof(Action<SvcHandler, IExecutionContext>));
         }
 
         private static void CheckIfTypeIsSupported(Type type, string svcName)
diff --git a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs
index e1f018c19..e49da023a 100644
--- a/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs
+++ b/Ryujinx.HLE/HOS/Kernel/SupervisorCall/SvcThread.cs
@@ -1,4 +1,5 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
 using Ryujinx.HLE.HOS.Kernel.Common;
 using Ryujinx.HLE.HOS.Kernel.Process;
 using Ryujinx.HLE.HOS.Kernel.Threading;
@@ -347,83 +348,91 @@ namespace Ryujinx.HLE.HOS.Kernel.SupervisorCall
                 return KernelResult.InvalidThread;
             }
 
-            MemoryManager memory = currentProcess.CpuMemory;
+            IMemoryManager memory = currentProcess.CpuMemory;
 
-            memory.WriteUInt64((long)address + 0x0,  thread.Context.ThreadState.X0);
-            memory.WriteUInt64((long)address + 0x8,  thread.Context.ThreadState.X1);
-            memory.WriteUInt64((long)address + 0x10, thread.Context.ThreadState.X2);
-            memory.WriteUInt64((long)address + 0x18, thread.Context.ThreadState.X3);
-            memory.WriteUInt64((long)address + 0x20, thread.Context.ThreadState.X4);
-            memory.WriteUInt64((long)address + 0x28, thread.Context.ThreadState.X5);
-            memory.WriteUInt64((long)address + 0x30, thread.Context.ThreadState.X6);
-            memory.WriteUInt64((long)address + 0x38, thread.Context.ThreadState.X7);
-            memory.WriteUInt64((long)address + 0x40, thread.Context.ThreadState.X8);
-            memory.WriteUInt64((long)address + 0x48, thread.Context.ThreadState.X9);
-            memory.WriteUInt64((long)address + 0x50, thread.Context.ThreadState.X10);
-            memory.WriteUInt64((long)address + 0x58, thread.Context.ThreadState.X11);
-            memory.WriteUInt64((long)address + 0x60, thread.Context.ThreadState.X12);
-            memory.WriteUInt64((long)address + 0x68, thread.Context.ThreadState.X13);
-            memory.WriteUInt64((long)address + 0x70, thread.Context.ThreadState.X14);
-            memory.WriteUInt64((long)address + 0x78, thread.Context.ThreadState.X15);
-            memory.WriteUInt64((long)address + 0x80, thread.Context.ThreadState.X16);
-            memory.WriteUInt64((long)address + 0x88, thread.Context.ThreadState.X17);
-            memory.WriteUInt64((long)address + 0x90, thread.Context.ThreadState.X18);
-            memory.WriteUInt64((long)address + 0x98, thread.Context.ThreadState.X19);
-            memory.WriteUInt64((long)address + 0xa0, thread.Context.ThreadState.X20);
-            memory.WriteUInt64((long)address + 0xa8, thread.Context.ThreadState.X21);
-            memory.WriteUInt64((long)address + 0xb0, thread.Context.ThreadState.X22);
-            memory.WriteUInt64((long)address + 0xb8, thread.Context.ThreadState.X23);
-            memory.WriteUInt64((long)address + 0xc0, thread.Context.ThreadState.X24);
-            memory.WriteUInt64((long)address + 0xc8, thread.Context.ThreadState.X25);
-            memory.WriteUInt64((long)address + 0xd0, thread.Context.ThreadState.X26);
-            memory.WriteUInt64((long)address + 0xd8, thread.Context.ThreadState.X27);
-            memory.WriteUInt64((long)address + 0xe0, thread.Context.ThreadState.X28);
-            memory.WriteUInt64((long)address + 0xe8, thread.Context.ThreadState.X29);
-            memory.WriteUInt64((long)address + 0xf0, thread.Context.ThreadState.X30);
-            memory.WriteUInt64((long)address + 0xf8, thread.Context.ThreadState.X31);
+            memory.WriteUInt64((long)address + 0x0,  thread.Context.GetX(0));
+            memory.WriteUInt64((long)address + 0x8,  thread.Context.GetX(1));
+            memory.WriteUInt64((long)address + 0x10, thread.Context.GetX(2));
+            memory.WriteUInt64((long)address + 0x18, thread.Context.GetX(3));
+            memory.WriteUInt64((long)address + 0x20, thread.Context.GetX(4));
+            memory.WriteUInt64((long)address + 0x28, thread.Context.GetX(5));
+            memory.WriteUInt64((long)address + 0x30, thread.Context.GetX(6));
+            memory.WriteUInt64((long)address + 0x38, thread.Context.GetX(7));
+            memory.WriteUInt64((long)address + 0x40, thread.Context.GetX(8));
+            memory.WriteUInt64((long)address + 0x48, thread.Context.GetX(9));
+            memory.WriteUInt64((long)address + 0x50, thread.Context.GetX(10));
+            memory.WriteUInt64((long)address + 0x58, thread.Context.GetX(11));
+            memory.WriteUInt64((long)address + 0x60, thread.Context.GetX(12));
+            memory.WriteUInt64((long)address + 0x68, thread.Context.GetX(13));
+            memory.WriteUInt64((long)address + 0x70, thread.Context.GetX(14));
+            memory.WriteUInt64((long)address + 0x78, thread.Context.GetX(15));
+            memory.WriteUInt64((long)address + 0x80, thread.Context.GetX(16));
+            memory.WriteUInt64((long)address + 0x88, thread.Context.GetX(17));
+            memory.WriteUInt64((long)address + 0x90, thread.Context.GetX(18));
+            memory.WriteUInt64((long)address + 0x98, thread.Context.GetX(19));
+            memory.WriteUInt64((long)address + 0xa0, thread.Context.GetX(20));
+            memory.WriteUInt64((long)address + 0xa8, thread.Context.GetX(21));
+            memory.WriteUInt64((long)address + 0xb0, thread.Context.GetX(22));
+            memory.WriteUInt64((long)address + 0xb8, thread.Context.GetX(23));
+            memory.WriteUInt64((long)address + 0xc0, thread.Context.GetX(24));
+            memory.WriteUInt64((long)address + 0xc8, thread.Context.GetX(25));
+            memory.WriteUInt64((long)address + 0xd0, thread.Context.GetX(26));
+            memory.WriteUInt64((long)address + 0xd8, thread.Context.GetX(27));
+            memory.WriteUInt64((long)address + 0xe0, thread.Context.GetX(28));
+            memory.WriteUInt64((long)address + 0xe8, thread.Context.GetX(29));
+            memory.WriteUInt64((long)address + 0xf0, thread.Context.GetX(30));
+            memory.WriteUInt64((long)address + 0xf8, thread.Context.GetX(31));
 
             memory.WriteInt64((long)address + 0x100, thread.LastPc);
 
-            memory.WriteUInt64((long)address + 0x108, (ulong)thread.Context.ThreadState.Psr);
+            memory.WriteUInt64((long)address + 0x108, (ulong)GetPsr(thread.Context));
 
-            memory.WriteVector128((long)address + 0x110, thread.Context.ThreadState.V0);
-            memory.WriteVector128((long)address + 0x120, thread.Context.ThreadState.V1);
-            memory.WriteVector128((long)address + 0x130, thread.Context.ThreadState.V2);
-            memory.WriteVector128((long)address + 0x140, thread.Context.ThreadState.V3);
-            memory.WriteVector128((long)address + 0x150, thread.Context.ThreadState.V4);
-            memory.WriteVector128((long)address + 0x160, thread.Context.ThreadState.V5);
-            memory.WriteVector128((long)address + 0x170, thread.Context.ThreadState.V6);
-            memory.WriteVector128((long)address + 0x180, thread.Context.ThreadState.V7);
-            memory.WriteVector128((long)address + 0x190, thread.Context.ThreadState.V8);
-            memory.WriteVector128((long)address + 0x1a0, thread.Context.ThreadState.V9);
-            memory.WriteVector128((long)address + 0x1b0, thread.Context.ThreadState.V10);
-            memory.WriteVector128((long)address + 0x1c0, thread.Context.ThreadState.V11);
-            memory.WriteVector128((long)address + 0x1d0, thread.Context.ThreadState.V12);
-            memory.WriteVector128((long)address + 0x1e0, thread.Context.ThreadState.V13);
-            memory.WriteVector128((long)address + 0x1f0, thread.Context.ThreadState.V14);
-            memory.WriteVector128((long)address + 0x200, thread.Context.ThreadState.V15);
-            memory.WriteVector128((long)address + 0x210, thread.Context.ThreadState.V16);
-            memory.WriteVector128((long)address + 0x220, thread.Context.ThreadState.V17);
-            memory.WriteVector128((long)address + 0x230, thread.Context.ThreadState.V18);
-            memory.WriteVector128((long)address + 0x240, thread.Context.ThreadState.V19);
-            memory.WriteVector128((long)address + 0x250, thread.Context.ThreadState.V20);
-            memory.WriteVector128((long)address + 0x260, thread.Context.ThreadState.V21);
-            memory.WriteVector128((long)address + 0x270, thread.Context.ThreadState.V22);
-            memory.WriteVector128((long)address + 0x280, thread.Context.ThreadState.V23);
-            memory.WriteVector128((long)address + 0x290, thread.Context.ThreadState.V24);
-            memory.WriteVector128((long)address + 0x2a0, thread.Context.ThreadState.V25);
-            memory.WriteVector128((long)address + 0x2b0, thread.Context.ThreadState.V26);
-            memory.WriteVector128((long)address + 0x2c0, thread.Context.ThreadState.V27);
-            memory.WriteVector128((long)address + 0x2d0, thread.Context.ThreadState.V28);
-            memory.WriteVector128((long)address + 0x2e0, thread.Context.ThreadState.V29);
-            memory.WriteVector128((long)address + 0x2f0, thread.Context.ThreadState.V30);
-            memory.WriteVector128((long)address + 0x300, thread.Context.ThreadState.V31);
+            memory.WriteVector128((long)address + 0x110, thread.Context.GetV(0));
+            memory.WriteVector128((long)address + 0x120, thread.Context.GetV(1));
+            memory.WriteVector128((long)address + 0x130, thread.Context.GetV(2));
+            memory.WriteVector128((long)address + 0x140, thread.Context.GetV(3));
+            memory.WriteVector128((long)address + 0x150, thread.Context.GetV(4));
+            memory.WriteVector128((long)address + 0x160, thread.Context.GetV(5));
+            memory.WriteVector128((long)address + 0x170, thread.Context.GetV(6));
+            memory.WriteVector128((long)address + 0x180, thread.Context.GetV(7));
+            memory.WriteVector128((long)address + 0x190, thread.Context.GetV(8));
+            memory.WriteVector128((long)address + 0x1a0, thread.Context.GetV(9));
+            memory.WriteVector128((long)address + 0x1b0, thread.Context.GetV(10));
+            memory.WriteVector128((long)address + 0x1c0, thread.Context.GetV(11));
+            memory.WriteVector128((long)address + 0x1d0, thread.Context.GetV(12));
+            memory.WriteVector128((long)address + 0x1e0, thread.Context.GetV(13));
+            memory.WriteVector128((long)address + 0x1f0, thread.Context.GetV(14));
+            memory.WriteVector128((long)address + 0x200, thread.Context.GetV(15));
+            memory.WriteVector128((long)address + 0x210, thread.Context.GetV(16));
+            memory.WriteVector128((long)address + 0x220, thread.Context.GetV(17));
+            memory.WriteVector128((long)address + 0x230, thread.Context.GetV(18));
+            memory.WriteVector128((long)address + 0x240, thread.Context.GetV(19));
+            memory.WriteVector128((long)address + 0x250, thread.Context.GetV(20));
+            memory.WriteVector128((long)address + 0x260, thread.Context.GetV(21));
+            memory.WriteVector128((long)address + 0x270, thread.Context.GetV(22));
+            memory.WriteVector128((long)address + 0x280, thread.Context.GetV(23));
+            memory.WriteVector128((long)address + 0x290, thread.Context.GetV(24));
+            memory.WriteVector128((long)address + 0x2a0, thread.Context.GetV(25));
+            memory.WriteVector128((long)address + 0x2b0, thread.Context.GetV(26));
+            memory.WriteVector128((long)address + 0x2c0, thread.Context.GetV(27));
+            memory.WriteVector128((long)address + 0x2d0, thread.Context.GetV(28));
+            memory.WriteVector128((long)address + 0x2e0, thread.Context.GetV(29));
+            memory.WriteVector128((long)address + 0x2f0, thread.Context.GetV(30));
+            memory.WriteVector128((long)address + 0x300, thread.Context.GetV(31));
 
-            memory.WriteInt32((long)address + 0x310, thread.Context.ThreadState.Fpcr);
-            memory.WriteInt32((long)address + 0x314, thread.Context.ThreadState.Fpsr);
-            memory.WriteInt64((long)address + 0x318, thread.Context.ThreadState.Tpidr);
+            memory.WriteInt32((long)address + 0x310, (int)thread.Context.Fpcr);
+            memory.WriteInt32((long)address + 0x314, (int)thread.Context.Fpsr);
+            memory.WriteInt64((long)address + 0x318, thread.Context.Tpidr);
 
             return KernelResult.Success;
         }
+
+        private static int GetPsr(IExecutionContext context)
+        {
+            return (context.GetPstateFlag(PState.NFlag) ? (1 << 31) : 0) |
+                   (context.GetPstateFlag(PState.ZFlag) ? (1 << 30) : 0) |
+                   (context.GetPstateFlag(PState.CFlag) ? (1 << 29) : 0) |
+                   (context.GetPstateFlag(PState.VFlag) ? (1 << 28) : 0);
+        }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs
index 42eed26a0..0b9511348 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/HleScheduler.cs
@@ -36,12 +36,12 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
                     {
                         KCoreContext coreContext = CoreContexts[core];
 
-                        if (coreContext.ContextSwitchNeeded && (coreContext.CurrentThread?.Context.IsCurrentThread() ?? false))
+                        if (coreContext.ContextSwitchNeeded && (coreContext.CurrentThread?.IsCurrentHostThread() ?? false))
                         {
                             coreContext.ContextSwitch();
                         }
 
-                        if (coreContext.CurrentThread?.Context.IsCurrentThread() ?? false)
+                        if (coreContext.CurrentThread?.IsCurrentHostThread() ?? false)
                         {
                             selectedCount++;
                         }
@@ -70,14 +70,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
                     {
                         // If this is not the thread that is currently executing, we need
                         // to request an interrupt to allow safely starting another thread.
-                        if (!currentThread.Context.IsCurrentThread())
+                        if (!currentThread.IsCurrentHostThread())
                         {
                             currentThread.Context.RequestInterrupt();
 
                             return;
                         }
 
-                        CoreManager.Reset(currentThread.Context.Work);
+                        CoreManager.Reset(currentThread.HostThread);
                     }
 
                     // Advance current core and try picking a thread,
@@ -92,9 +92,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
 
                         if (coreContext.CurrentThread != null)
                         {
-                            CoreManager.Set(coreContext.CurrentThread.Context.Work);
+                            CoreManager.Set(coreContext.CurrentThread.HostThread);
 
-                            coreContext.CurrentThread.Context.Execute();
+                            coreContext.CurrentThread.Execute();
 
                             break;
                         }
@@ -134,14 +134,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
 
         public void ExitThread(KThread thread)
         {
-            thread.Context.StopExecution();
+            thread.Context.Running = false;
 
-            CoreManager.Exit(thread.Context.Work);
+            CoreManager.Exit(thread.HostThread);
         }
 
         public void RemoveThread(KThread thread)
         {
-            CoreManager.RemoveThread(thread.Context.Work);
+            CoreManager.RemoveThread(thread.HostThread);
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs
index 979071772..0aa12b0dd 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KCoreContext.cs
@@ -58,7 +58,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
 
             if (CurrentThread != null)
             {
-                _coreManager.Reset(CurrentThread.Context.Work);
+                _coreManager.Reset(CurrentThread.HostThread);
             }
 
             CurrentThread = SelectedThread;
@@ -70,9 +70,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
                 CurrentThread.TotalTimeRunning += currentTime - CurrentThread.LastScheduledTime;
                 CurrentThread.LastScheduledTime = currentTime;
 
-                _coreManager.Set(CurrentThread.Context.Work);
+                _coreManager.Set(CurrentThread.HostThread);
 
-                CurrentThread.Context.Execute();
+                CurrentThread.Execute();
             }
         }
     }
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs
index 39c857b5d..b7013bb7b 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KCriticalSection.cs
@@ -1,4 +1,4 @@
-using ChocolArm64;
+using ARMeilleure;
 using System.Threading;
 
 namespace Ryujinx.HLE.HOS.Kernel.Threading
@@ -53,14 +53,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
 
                             if (coreContext.ContextSwitchNeeded)
                             {
-                                CpuThread currentHleThread = coreContext.CurrentThread?.Context;
+                                KThread currentThread = coreContext.CurrentThread;
 
-                                if (currentHleThread == null)
+                                if (currentThread == null)
                                 {
                                     // Nothing is running, we can perform the context switch immediately.
                                     coreContext.ContextSwitch();
                                 }
-                                else if (currentHleThread.IsCurrentThread())
+                                else if (currentThread.IsCurrentHostThread())
                                 {
                                     // Thread running on the current core, context switch will block.
                                     doContextSwitch = true;
@@ -68,7 +68,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
                                 else
                                 {
                                     // Thread running on another core, request a interrupt.
-                                    currentHleThread.RequestInterrupt();
+                                    currentThread.Context.RequestInterrupt();
                                 }
                             }
                         }
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs
index 8d2cdfce6..dd5422b8e 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KScheduler.cs
@@ -203,7 +203,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
             {
                 for (int core = 0; core < CpuCoresCount; core++)
                 {
-                    if (CoreContexts[core].CurrentThread?.Context.IsCurrentThread() ?? false)
+                    if (CoreContexts[core].CurrentThread?.IsCurrentHostThread() ?? false)
                     {
                         return CoreContexts[core].CurrentThread;
                     }
diff --git a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
index 50c71ea91..54d5d06c8 100644
--- a/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
+++ b/Ryujinx.HLE/HOS/Kernel/Threading/KThread.cs
@@ -1,5 +1,5 @@
-using ChocolArm64;
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
 using Ryujinx.Common.Logging;
 using Ryujinx.HLE.HOS.Kernel.Common;
 using Ryujinx.HLE.HOS.Kernel.Process;
@@ -7,12 +7,17 @@ using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
+using System.Threading;
 
 namespace Ryujinx.HLE.HOS.Kernel.Threading
 {
     class KThread : KSynchronizationObject, IKFutureSchedulerObject
     {
-        public CpuThread Context { get; private set; }
+        private int _hostThreadRunning;
+
+        public Thread HostThread { get; private set; }
+
+        public IExecutionContext Context { get; private set; }
 
         public long AffinityMask { get; set; }
 
@@ -152,30 +157,35 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
                 is64Bits = true;
             }
 
-            Context = new CpuThread(owner.Translator, owner.CpuMemory, (long)entrypoint);
+            HostThread = new Thread(() => ThreadStart(entrypoint));
 
-            bool isAarch32 = (Owner.MmuFlags & 1) == 0;
-
-            Context.ThreadState.Aarch32 = isAarch32;
-
-            Context.ThreadState.X0  = argsPtr;
-
-            if (isAarch32)
+            if (System.UseLegacyJit)
             {
-                Context.ThreadState.X13 = (uint)stackTop;
+                Context = new ChocolArm64.State.CpuThreadState();
             }
             else
             {
-                Context.ThreadState.X31 = stackTop;
+                Context = new ARMeilleure.State.ExecutionContext();
             }
 
-            Context.ThreadState.CntfrqEl0 = 19200000;
-            Context.ThreadState.Tpidr     = (long)_tlsAddress;
+            bool isAarch32 = (Owner.MmuFlags & 1) == 0;
+
+            Context.SetX(0, argsPtr);
+
+            if (isAarch32)
+            {
+                Context.SetX(13, (uint)stackTop);
+            }
+            else
+            {
+                Context.SetX(31, stackTop);
+            }
+
+            Context.CntfrqEl0 = 19200000;
+            Context.Tpidr     = (long)_tlsAddress;
 
             owner.SubscribeThreadEventHandlers(Context);
 
-            Context.WorkFinished += ThreadFinishedHandler;
-
             ThreadUid = System.GetThreadUid();
 
             _hasBeenInitialized = true;
@@ -1002,8 +1012,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
 
         public void SetEntryArguments(long argsPtr, int threadHandle)
         {
-            Context.ThreadState.X0 = (ulong)argsPtr;
-            Context.ThreadState.X1 = (ulong)threadHandle;
+            Context.SetX(0, (ulong)argsPtr);
+            Context.SetX(1, (ulong)threadHandle);
         }
 
         public void TimeUp()
@@ -1013,7 +1023,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
 
         public string GetGuestStackTrace()
         {
-            return Owner.Debugger.GetGuestStackTrace(Context.ThreadState);
+            return Owner.Debugger.GetGuestStackTrace(Context);
         }
 
         public void PrintGuestStackTrace()
@@ -1026,12 +1036,32 @@ namespace Ryujinx.HLE.HOS.Kernel.Threading
             Logger.PrintInfo(LogClass.Cpu, trace.ToString());
         }
 
-        private void ThreadFinishedHandler(object sender, EventArgs e)
+        public void Execute()
+        {
+            if (Interlocked.CompareExchange(ref _hostThreadRunning, 1, 0) == 0)
+            {
+                HostThread.Start();
+            }
+        }
+
+        private void ThreadStart(ulong entrypoint)
+        {
+            Owner.Translator.Execute(Context, entrypoint);
+
+            ThreadExit();
+        }
+
+        private void ThreadExit()
         {
             System.Scheduler.ExitThread(this);
             System.Scheduler.RemoveThread(this);
         }
 
+        public bool IsCurrentHostThread()
+        {
+            return Thread.CurrentThread == HostThread;
+        }
+
         public override bool IsSignaled()
         {
             return _hasExited;
diff --git a/Ryujinx.HLE/HOS/ProgramLoader.cs b/Ryujinx.HLE/HOS/ProgramLoader.cs
index af974e18f..0bc6447e5 100644
--- a/Ryujinx.HLE/HOS/ProgramLoader.cs
+++ b/Ryujinx.HLE/HOS/ProgramLoader.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common;
 using Ryujinx.Common.Logging;
 using Ryujinx.HLE.HOS.Kernel.Common;
diff --git a/Ryujinx.HLE/HOS/ServiceCtx.cs b/Ryujinx.HLE/HOS/ServiceCtx.cs
index 99b2d5afe..df74ba0a8 100644
--- a/Ryujinx.HLE/HOS/ServiceCtx.cs
+++ b/Ryujinx.HLE/HOS/ServiceCtx.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.HLE.HOS.Ipc;
 using Ryujinx.HLE.HOS.Kernel.Ipc;
 using Ryujinx.HLE.HOS.Kernel.Process;
@@ -11,7 +11,7 @@ namespace Ryujinx.HLE.HOS
     {
         public Switch         Device       { get; }
         public KProcess       Process      { get; }
-        public MemoryManager  Memory       { get; }
+        public IMemoryManager Memory       { get; }
         public KThread        Thread       { get; }
         public KClientSession Session      { get; }
         public IpcMessage     Request      { get; }
@@ -22,7 +22,7 @@ namespace Ryujinx.HLE.HOS
         public ServiceCtx(
             Switch         device,
             KProcess       process,
-            MemoryManager  memory,
+            IMemoryManager memory,
             KThread        thread,
             KClientSession session,
             IpcMessage     request,
diff --git a/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs b/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs
index 050e44971..10210afed 100644
--- a/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs
+++ b/Ryujinx.HLE/HOS/Services/Acc/IProfile.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common.Logging;
 using Ryujinx.HLE.HOS.SystemState;
 using Ryujinx.HLE.Utilities;
diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs
index 4191dfd67..751d3f704 100644
--- a/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs
+++ b/Ryujinx.HLE/HOS/Services/Aud/AudioOut/IAudioOut.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Audio;
 using Ryujinx.HLE.HOS.Ipc;
 using Ryujinx.HLE.HOS.Kernel.Common;
diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs
index 599f3d81f..e8baf8192 100644
--- a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs
+++ b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/IAudioRenderer.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Audio;
 using Ryujinx.Audio.Adpcm;
 using Ryujinx.Common.Logging;
@@ -24,7 +24,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer
 
         private KEvent _updateEvent;
 
-        private MemoryManager _memory;
+        private IMemoryManager _memory;
 
         private IAalOutput _audioOut;
 
@@ -40,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer
 
         public IAudioRenderer(
             Horizon                system,
-            MemoryManager          memory,
+            IMemoryManager         memory,
             IAalOutput             audioOut,
             AudioRendererParameter Params)
         {
diff --git a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs
index 93a16a617..aaff20a5b 100644
--- a/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs
+++ b/Ryujinx.HLE/HOS/Services/Aud/AudioRenderer/VoiceContext.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Audio.Adpcm;
 using System;
 
@@ -65,7 +65,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer
             _outStatus.VoiceDropsCount        = 0;
         }
 
-        public int[] GetBufferData(MemoryManager memory, int maxSamples, out int samplesCount)
+        public int[] GetBufferData(IMemoryManager memory, int maxSamples, out int samplesCount)
         {
             if (!Playing)
             {
@@ -122,7 +122,7 @@ namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer
             return output;
         }
 
-        private void UpdateBuffer(MemoryManager memory)
+        private void UpdateBuffer(IMemoryManager memory)
         {
             // TODO: Implement conversion for formats other
             // than interleaved stereo (2 channels).
diff --git a/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs b/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs
index ad0dd0445..bea0f3f20 100644
--- a/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs
+++ b/Ryujinx.HLE/HOS/Services/Aud/IAudioOutManager.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Audio;
 using Ryujinx.Common.Logging;
 using Ryujinx.HLE.HOS.Kernel.Threading;
diff --git a/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs b/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs
index b8780730d..748a600d5 100644
--- a/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs
+++ b/Ryujinx.HLE/HOS/Services/Ldr/IRoInterface.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common;
 using Ryujinx.HLE.HOS.Ipc;
 using Ryujinx.HLE.HOS.Kernel.Common;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs
index 50ab7e01f..261c1c5ae 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common.Logging;
 using Ryujinx.HLE.HOS.Ipc;
 using Ryujinx.HLE.HOS.Kernel.Common;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs
index 3b96ed6bc..47d15a7e5 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvGpuAS/NvGpuASIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.Memory;
 using Ryujinx.HLE.HOS.Kernel.Process;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs
index 4f276d5d9..04b0c63cd 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvGpuGpu/NvGpuGpuIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common.Logging;
 using System;
 using System.Diagnostics;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs
index c5f296363..e7879f4a2 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvHostChannel/NvHostChannelIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.Memory;
 using Ryujinx.HLE.HOS.Kernel.Process;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs
index 35f1a9491..2a84b677f 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvHostCtrl/NvHostCtrlIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common.Logging;
 using Ryujinx.HLE.HOS.Kernel.Process;
 using System;
diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs b/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs
index 722866622..d9c579a2a 100644
--- a/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvMap/NvMapIoctl.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.Memory;
 using Ryujinx.HLE.HOS.Kernel.Process;
diff --git a/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs b/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs
index fea5bf2f6..5b2d6c84e 100644
--- a/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs
+++ b/Ryujinx.HLE/HOS/Services/Time/Clock/StandardSteadyClockCore.cs
@@ -40,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Services.Time.Clock
                 ClockSourceId = GetClockSourceId()
             };
 
-            TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.ThreadState.CntpctEl0, thread.Context.ThreadState.CntfrqEl0);
+            TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.CntpctEl0, thread.Context.CntfrqEl0);
 
             result.TimePoint = _setupValue + ticksTimeSpan.ToSeconds();
 
diff --git a/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs b/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs
index 7a69b014b..6cd4c80b4 100644
--- a/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs
+++ b/Ryujinx.HLE/HOS/Services/Time/Clock/TickBasedSteadyClockCore.cs
@@ -30,7 +30,7 @@ namespace Ryujinx.HLE.HOS.Services.Time.Clock
                 ClockSourceId = GetClockSourceId()
             };
 
-            TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.ThreadState.CntpctEl0, thread.Context.ThreadState.CntfrqEl0);
+            TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(thread.Context.CntpctEl0, thread.Context.CntfrqEl0);
 
             result.TimePoint = ticksTimeSpan.ToSeconds();
 
diff --git a/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs b/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs
index 9ee038d58..d9c5b4f25 100644
--- a/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs
+++ b/Ryujinx.HLE/HOS/Services/Time/IStaticService.cs
@@ -141,7 +141,7 @@ namespace Ryujinx.HLE.HOS.Services.Time
 
             if (currentTimePoint.ClockSourceId == otherContext.SteadyTimePoint.ClockSourceId)
             {
-                TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(context.Thread.Context.ThreadState.CntpctEl0, context.Thread.Context.ThreadState.CntfrqEl0);
+                TimeSpanType ticksTimeSpan = TimeSpanType.FromTicks(context.Thread.Context.CntpctEl0, context.Thread.Context.CntfrqEl0);
                 long         baseTimePoint = otherContext.Offset + currentTimePoint.TimePoint - ticksTimeSpan.ToSeconds();
 
                 context.ResponseData.Write(baseTimePoint);
diff --git a/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs b/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs
index 895bb1f3e..b820de38f 100644
--- a/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs
+++ b/Ryujinx.HLE/HOS/Services/Time/ITimeZoneService.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.Common;
 using Ryujinx.Common.Logging;
 using Ryujinx.HLE.HOS.Services.Time.TimeZone;
@@ -106,7 +106,7 @@ namespace Ryujinx.HLE.HOS.Services.Time
             string locationName = Encoding.ASCII.GetString(context.RequestData.ReadBytes(0x24)).TrimEnd('\0');
 
             ResultCode resultCode = TimeZoneManager.Instance.LoadTimeZoneRules(out TimeZoneRule rules, locationName);
-            
+
             // Write TimeZoneRule if success
             if (resultCode == 0)
             {
diff --git a/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs b/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs
index 2f1e68e8f..15db6ff2b 100644
--- a/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs
+++ b/Ryujinx.HLE/HOS/Services/Vi/IApplicationDisplayService.cs
@@ -1,4 +1,4 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using Ryujinx.HLE.HOS.Ipc;
 using Ryujinx.HLE.HOS.Kernel.Common;
 using System;
diff --git a/Ryujinx.HLE/Ryujinx.HLE.csproj b/Ryujinx.HLE/Ryujinx.HLE.csproj
index 78e5c2a3a..3a12a179f 100644
--- a/Ryujinx.HLE/Ryujinx.HLE.csproj
+++ b/Ryujinx.HLE/Ryujinx.HLE.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
     <LangVersion>7.1</LangVersion>
   </PropertyGroup>
@@ -38,16 +38,18 @@
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\ChocolArm64\ChocolArm64.csproj" />
     <ProjectReference Include="..\Ryujinx.Audio\Ryujinx.Audio.csproj" />
     <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
     <ProjectReference Include="..\Ryujinx.Graphics\Ryujinx.Graphics.csproj" />
     <ProjectReference Include="..\Ryujinx.Profiler\Ryujinx.Profiler.csproj" />
+    <ProjectReference Include="..\ARMeilleure\ARMeilleure.csproj" />
+    <ProjectReference Include="..\ChocolArm64\ChocolArm64.csproj" />
   </ItemGroup>
 
   <ItemGroup>
     <PackageReference Include="Concentus" Version="1.1.7" />
     <PackageReference Include="LibHac" Version="0.5.0" />
+    <PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
     <PackageReference Include="TimeZoneConverter.Posix" Version="2.1.0" />
   </ItemGroup>
 
diff --git a/Ryujinx.HLE/Utilities/StructReader.cs b/Ryujinx.HLE/Utilities/StructReader.cs
index 441dfd195..36e5c7d19 100644
--- a/Ryujinx.HLE/Utilities/StructReader.cs
+++ b/Ryujinx.HLE/Utilities/StructReader.cs
@@ -1,15 +1,15 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using System.Runtime.InteropServices;
 
 namespace Ryujinx.HLE.Utilities
 {
     class StructReader
     {
-        private MemoryManager _memory;
+        private IMemoryManager _memory;
 
         public long Position { get; private set; }
 
-        public StructReader(MemoryManager memory, long position)
+        public StructReader(IMemoryManager memory, long position)
         {
             _memory  = memory;
             Position = position;
diff --git a/Ryujinx.HLE/Utilities/StructWriter.cs b/Ryujinx.HLE/Utilities/StructWriter.cs
index 86cfeedd7..c156956db 100644
--- a/Ryujinx.HLE/Utilities/StructWriter.cs
+++ b/Ryujinx.HLE/Utilities/StructWriter.cs
@@ -1,15 +1,15 @@
-using ChocolArm64.Memory;
+using ARMeilleure.Memory;
 using System.Runtime.InteropServices;
 
 namespace Ryujinx.HLE.Utilities
 {
     class StructWriter
     {
-        private MemoryManager _memory;
+        private IMemoryManager _memory;
 
         public long Position { get; private set; }
 
-        public StructWriter(MemoryManager memory, long position)
+        public StructWriter(IMemoryManager memory, long position)
         {
             _memory  = memory;
             Position = position;
diff --git a/Ryujinx.LLE/Luea.csproj b/Ryujinx.LLE/Luea.csproj
index 719a0ef38..895f27eef 100644
--- a/Ryujinx.LLE/Luea.csproj
+++ b/Ryujinx.LLE/Luea.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <OutputType>Exe</OutputType>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
   </PropertyGroup>
diff --git a/Ryujinx.Profiler/Ryujinx.Profiler.csproj b/Ryujinx.Profiler/Ryujinx.Profiler.csproj
index 5a4c8f4f9..bcc2d17d2 100644
--- a/Ryujinx.Profiler/Ryujinx.Profiler.csproj
+++ b/Ryujinx.Profiler/Ryujinx.Profiler.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
   </PropertyGroup>
diff --git a/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj b/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj
index 04cab8328..a2ff36d9b 100644
--- a/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj
+++ b/Ryujinx.ShaderTools/Ryujinx.ShaderTools.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <OutputType>Exe</OutputType>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
   </PropertyGroup>
diff --git a/Ryujinx.Tests.Unicorn/IndexedProperty.cs b/Ryujinx.Tests.Unicorn/IndexedProperty.cs
index a4365026b..65d445fc0 100644
--- a/Ryujinx.Tests.Unicorn/IndexedProperty.cs
+++ b/Ryujinx.Tests.Unicorn/IndexedProperty.cs
@@ -4,24 +4,24 @@ namespace Ryujinx.Tests.Unicorn
 {
     public class IndexedProperty<TIndex, TValue>
     {
-        readonly Action<TIndex, TValue> SetAction;
-        readonly Func<TIndex, TValue> GetFunc;
+        private Func<TIndex, TValue>   _getFunc;
+        private Action<TIndex, TValue> _setAction;
 
         public IndexedProperty(Func<TIndex, TValue> getFunc, Action<TIndex, TValue> setAction)
         {
-            GetFunc   = getFunc;
-            SetAction = setAction;
+            _getFunc   = getFunc;
+            _setAction = setAction;
         }
 
-        public TValue this[TIndex i]
+        public TValue this[TIndex index]
         {
             get
             {
-                return GetFunc(i);
+                return _getFunc(index);
             }
             set
             {
-                SetAction(i, value);
+                _setAction(index, value);
             }
         }
     }
diff --git a/Ryujinx.Tests.Unicorn/Native/Interface.cs b/Ryujinx.Tests.Unicorn/Native/Interface.cs
index 006585b5c..59b1da079 100644
--- a/Ryujinx.Tests.Unicorn/Native/Interface.cs
+++ b/Ryujinx.Tests.Unicorn/Native/Interface.cs
@@ -16,11 +16,13 @@ namespace Ryujinx.Tests.Unicorn.Native
         public static void MarshalArrayOf<T>(IntPtr input, int length, out T[] output)
         {
             int size = Marshal.SizeOf(typeof(T));
+
             output = new T[length];
 
             for (int i = 0; i < length; i++)
             {
                 IntPtr item = new IntPtr(input.ToInt64() + i * size);
+
                 output[i] = Marshal.PtrToStructure<T>(item);
             }
         }
@@ -29,7 +31,7 @@ namespace Ryujinx.Tests.Unicorn.Native
         public static extern uint uc_version(out uint major, out uint minor);
 
         [DllImport("unicorn", CallingConvention = CallingConvention.Cdecl)]
-        public static extern UnicornError uc_open(uint arch, uint mode, out IntPtr uc);
+        public static extern UnicornError uc_open(UnicornArch arch, UnicornMode mode, out IntPtr uc);
 
         [DllImport("unicorn", CallingConvention = CallingConvention.Cdecl)]
         public static extern UnicornError uc_close(IntPtr uc);
diff --git a/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs b/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs
index 73710faa8..ff633293e 100644
--- a/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs
+++ b/Ryujinx.Tests.Unicorn/Native/UnicornArch.cs
@@ -1,6 +1,6 @@
 namespace Ryujinx.Tests.Unicorn.Native
 {
-    public enum UnicornArch
+    public enum UnicornArch : uint
     {
         UC_ARCH_ARM = 1,    // ARM architecture (including Thumb, Thumb-2)
         UC_ARCH_ARM64,      // ARM-64, also called AArch64
diff --git a/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs b/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs
index 5cd835169..8045f2dac 100644
--- a/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs
+++ b/Ryujinx.Tests.Unicorn/Native/UnicornMode.cs
@@ -1,7 +1,7 @@
 // ReSharper disable InconsistentNaming
 namespace Ryujinx.Tests.Unicorn.Native
 {
-    public enum UnicornMode
+    public enum UnicornMode : uint
     {
         UC_MODE_LITTLE_ENDIAN = 0,    // little-endian mode (default mode)
         UC_MODE_BIG_ENDIAN = 1 << 30, // big-endian mode
diff --git a/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj b/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj
index 5a99b39f1..d15a405bc 100644
--- a/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj
+++ b/Ryujinx.Tests.Unicorn/Ryujinx.Tests.Unicorn.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
   </PropertyGroup>
@@ -23,7 +23,6 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.8.0" />
-    <PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
   </ItemGroup>
 
 </Project>
diff --git a/Ryujinx.Tests.Unicorn/SimdValue.cs b/Ryujinx.Tests.Unicorn/SimdValue.cs
new file mode 100644
index 000000000..2b5284305
--- /dev/null
+++ b/Ryujinx.Tests.Unicorn/SimdValue.cs
@@ -0,0 +1,112 @@
+using System;
+
+namespace Ryujinx.Tests.Unicorn
+{
+    public struct SimdValue : IEquatable<SimdValue>
+    {
+        private ulong _e0;
+        private ulong _e1;
+
+        public SimdValue(ulong e0, ulong e1)
+        {
+            _e0 = e0;
+            _e1 = e1;
+        }
+
+        public SimdValue(byte[] data)
+        {
+            _e0 = (ulong)BitConverter.ToInt64(data, 0);
+            _e1 = (ulong)BitConverter.ToInt64(data, 8);
+        }
+
+        public float AsFloat()
+        {
+            return GetFloat(0);
+        }
+
+        public double AsDouble()
+        {
+            return GetDouble(0);
+        }
+
+        public float GetFloat(int index)
+        {
+            return BitConverter.Int32BitsToSingle(GetInt32(index));
+        }
+
+        public double GetDouble(int index)
+        {
+            return BitConverter.Int64BitsToDouble(GetInt64(index));
+        }
+
+        public int  GetInt32(int index) => (int)GetUInt32(index);
+        public long GetInt64(int index) => (long)GetUInt64(index);
+
+        public uint GetUInt32(int index)
+        {
+            switch (index)
+            {
+                case 0: return (uint)(_e0 >> 0);
+                case 1: return (uint)(_e0 >> 32);
+                case 2: return (uint)(_e1 >> 0);
+                case 3: return (uint)(_e1 >> 32);
+            }
+
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+
+        public ulong GetUInt64(int index)
+        {
+            switch (index)
+            {
+                case 0: return _e0;
+                case 1: return _e1;
+            }
+
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+
+        public byte[] ToArray()
+        {
+            byte[] e0Data = BitConverter.GetBytes(_e0);
+            byte[] e1Data = BitConverter.GetBytes(_e1);
+
+            byte[] data = new byte[16];
+
+            Buffer.BlockCopy(e0Data, 0, data, 0, 8);
+            Buffer.BlockCopy(e1Data, 0, data, 8, 8);
+
+            return data;
+        }
+
+        public override int GetHashCode()
+        {
+            return HashCode.Combine(_e0, _e1);
+        }
+
+        public static bool operator ==(SimdValue x, SimdValue y)
+        {
+            return x.Equals(y);
+        }
+
+        public static bool operator !=(SimdValue x, SimdValue y)
+        {
+            return !x.Equals(y);
+        }
+
+        public override bool Equals(object obj)
+        {
+            return obj is SimdValue vector && Equals(vector);
+        }
+
+        public bool Equals(SimdValue other)
+        {
+            return other._e0 == _e0 && other._e1 == _e1;
+        }
+
+        public override string ToString()
+        {
+            return $"0x{_e1:X16}{_e0:X16}";
+        }
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Tests.Unicorn/UnicornAArch64.cs b/Ryujinx.Tests.Unicorn/UnicornAArch64.cs
index 0425d1d3a..4453d18d0 100644
--- a/Ryujinx.Tests.Unicorn/UnicornAArch64.cs
+++ b/Ryujinx.Tests.Unicorn/UnicornAArch64.cs
@@ -1,8 +1,5 @@
 using Ryujinx.Tests.Unicorn.Native;
 using System;
-using System.Diagnostics.Contracts;
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
 
 namespace Ryujinx.Tests.Unicorn
 {
@@ -15,95 +12,96 @@ namespace Ryujinx.Tests.Unicorn
             get
             {
                 return new IndexedProperty<int, ulong>(
-                    (int i) => GetX(i),
+                    (int i)              => GetX(i),
                     (int i, ulong value) => SetX(i, value));
             }
         }
 
-        public IndexedProperty<int, Vector128<float>> Q
+        public IndexedProperty<int, SimdValue> Q
         {
             get
             {
-                return new IndexedProperty<int, Vector128<float>>(
-                    (int i) => GetQ(i),
-                    (int i, Vector128<float> value) => SetQ(i, value));
+                return new IndexedProperty<int, SimdValue>(
+                    (int i)                  => GetQ(i),
+                    (int i, SimdValue value) => SetQ(i, value));
             }
         }
 
         public ulong LR
         {
-            get { return GetRegister(ArmRegister.LR); }
-            set { SetRegister(ArmRegister.LR, value); }
+            get => GetRegister(ArmRegister.LR);
+            set => SetRegister(ArmRegister.LR, value);
         }
 
         public ulong SP
         {
-            get { return GetRegister(ArmRegister.SP); }
-            set { SetRegister(ArmRegister.SP, value); }
+            get => GetRegister(ArmRegister.SP);
+            set => SetRegister(ArmRegister.SP, value);
         }
 
         public ulong PC
         {
-            get { return GetRegister(ArmRegister.PC); }
-            set { SetRegister(ArmRegister.PC, value); }
+            get => GetRegister(ArmRegister.PC);
+            set => SetRegister(ArmRegister.PC, value);
         }
 
         public uint Pstate
         {
-            get { return (uint)GetRegister(ArmRegister.PSTATE); }
-            set { SetRegister(ArmRegister.PSTATE, (uint)value); }
+            get => (uint)GetRegister(ArmRegister.PSTATE);
+            set =>       SetRegister(ArmRegister.PSTATE, (uint)value);
         }
 
         public int Fpcr
         {
-            get { return (int)GetRegister(ArmRegister.FPCR); }
-            set { SetRegister(ArmRegister.FPCR, (uint)value); }
+            get => (int)GetRegister(ArmRegister.FPCR);
+            set =>      SetRegister(ArmRegister.FPCR, (uint)value);
         }
 
         public int Fpsr
         {
-            get { return (int)GetRegister(ArmRegister.FPSR); }
-            set { SetRegister(ArmRegister.FPSR, (uint)value); }
+            get => (int)GetRegister(ArmRegister.FPSR);
+            set =>      SetRegister(ArmRegister.FPSR, (uint)value);
         }
 
         public bool OverflowFlag
         {
-            get { return (Pstate & 0x10000000u) != 0; }
-            set { Pstate = (Pstate & ~0x10000000u) | (value ? 0x10000000u : 0u); }
+            get =>          (Pstate &  0x10000000u) != 0;
+            set => Pstate = (Pstate & ~0x10000000u) | (value ? 0x10000000u : 0u);
         }
 
         public bool CarryFlag
         {
-            get { return (Pstate & 0x20000000u) != 0; }
-            set { Pstate = (Pstate & ~0x20000000u) | (value ? 0x20000000u : 0u); }
+            get =>          (Pstate &  0x20000000u) != 0;
+            set => Pstate = (Pstate & ~0x20000000u) | (value ? 0x20000000u : 0u);
         }
 
         public bool ZeroFlag
         {
-            get { return (Pstate & 0x40000000u) != 0; }
-            set { Pstate = (Pstate & ~0x40000000u) | (value ? 0x40000000u : 0u); }
+            get =>          (Pstate &  0x40000000u) != 0;
+            set => Pstate = (Pstate & ~0x40000000u) | (value ? 0x40000000u : 0u);
         }
 
         public bool NegativeFlag
         {
-            get { return (Pstate & 0x80000000u) != 0; }
-            set { Pstate = (Pstate & ~0x80000000u) | (value ? 0x80000000u : 0u); }
+            get =>          (Pstate &  0x80000000u) != 0;
+            set => Pstate = (Pstate & ~0x80000000u) | (value ? 0x80000000u : 0u);
         }
 
         public UnicornAArch64()
         {
-            Interface.Checked(Interface.uc_open((uint)UnicornArch.UC_ARCH_ARM64, (uint)UnicornMode.UC_MODE_LITTLE_ENDIAN, out uc));
+            Interface.Checked(Interface.uc_open(UnicornArch.UC_ARCH_ARM64, UnicornMode.UC_MODE_LITTLE_ENDIAN, out uc));
+
             SetRegister(ArmRegister.CPACR_EL1, 0x00300000);
         }
 
         ~UnicornAArch64()
         {
-            Interface.Checked(Interface.uc_close(uc));
+            Interface.Checked(Native.Interface.uc_close(uc));
         }
 
         public void RunForCount(ulong count)
         {
-            Interface.Checked(Interface.uc_emu_start(uc, PC, 0xFFFFFFFFFFFFFFFFu, 0, count));
+            Interface.Checked(Native.Interface.uc_emu_start(uc, this.PC, 0xFFFFFFFFFFFFFFFFu, 0, count));
         }
 
         public void Step()
@@ -111,7 +109,7 @@ namespace Ryujinx.Tests.Unicorn
             RunForCount(1);
         }
 
-        internal static ArmRegister[] X_registers = new ArmRegister[31]
+        private static ArmRegister[] XRegisters = new ArmRegister[31]
         {
             ArmRegister.X0,
             ArmRegister.X1,
@@ -146,7 +144,7 @@ namespace Ryujinx.Tests.Unicorn
             ArmRegister.X30,
         };
 
-        internal static ArmRegister[] Q_registers = new ArmRegister[32]
+        private static ArmRegister[] QRegisters = new ArmRegister[32]
         {
             ArmRegister.Q0,
             ArmRegister.Q1,
@@ -182,97 +180,104 @@ namespace Ryujinx.Tests.Unicorn
             ArmRegister.Q31,
         };
 
-        internal ulong GetRegister(ArmRegister register)
-        {
-            byte[] value_bytes = new byte[8];
-            Interface.Checked(Interface.uc_reg_read(uc, (int)register, value_bytes));
-            return (ulong)BitConverter.ToInt64(value_bytes, 0);
-        }
-
-        internal void SetRegister(ArmRegister register, ulong value)
-        {
-            byte[] value_bytes = BitConverter.GetBytes(value);
-            Interface.Checked(Interface.uc_reg_write(uc, (int)register, value_bytes));
-        }
-
-        internal Vector128<float> GetVector(ArmRegister register)
-        {
-            byte[] value_bytes = new byte[16];
-            Interface.Checked(Interface.uc_reg_read(uc, (int)register, value_bytes));
-            unsafe
-            {
-                fixed (byte* p = &value_bytes[0])
-                {
-                    return Sse.LoadVector128((float*)p);
-                }
-            }
-        }
-
-        internal void SetVector(ArmRegister register, Vector128<float> value)
-        {
-            byte[] value_bytes = new byte[16];
-            unsafe
-            {
-                fixed (byte* p = &value_bytes[0])
-                {
-                    Sse.Store((float*)p, value);
-                }
-            }
-            Interface.Checked(Interface.uc_reg_write(uc, (int)register, value_bytes));
-        }
-
         public ulong GetX(int index)
         {
-            Contract.Requires(index <= 30, "invalid register");
+            if ((uint)index > 30)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
 
-            return GetRegister(X_registers[index]);
+            return GetRegister(XRegisters[index]);
         }
 
         public void SetX(int index, ulong value)
         {
-            Contract.Requires(index <= 30, "invalid register");
+            if ((uint)index > 30)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
 
-            SetRegister(X_registers[index], value);
+            SetRegister(XRegisters[index], value);
         }
 
-        public Vector128<float> GetQ(int index)
+        public SimdValue GetQ(int index)
         {
-            Contract.Requires(index <= 31, "invalid vector");
+            if ((uint)index > 31)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
 
-            return GetVector(Q_registers[index]);
+            return GetVector(QRegisters[index]);
         }
 
-        public void SetQ(int index, Vector128<float> value)
+        public void SetQ(int index, SimdValue value)
         {
-            Contract.Requires(index <= 31, "invalid vector");
+            if ((uint)index > 31)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
 
-            SetVector(Q_registers[index], value);
+            SetVector(QRegisters[index], value);
+        }
+
+        private ulong GetRegister(ArmRegister register)
+        {
+            byte[] data = new byte[8];
+
+            Interface.Checked(Native.Interface.uc_reg_read(uc, (int)register, data));
+
+            return (ulong)BitConverter.ToInt64(data, 0);
+        }
+
+        private void SetRegister(ArmRegister register, ulong value)
+        {
+            byte[] data = BitConverter.GetBytes(value);
+
+            Interface.Checked(Interface.uc_reg_write(uc, (int)register, data));
+        }
+
+        private SimdValue GetVector(ArmRegister register)
+        {
+            byte[] data = new byte[16];
+
+            Interface.Checked(Interface.uc_reg_read(uc, (int)register, data));
+
+            return new SimdValue(data);
+        }
+
+        private void SetVector(ArmRegister register, SimdValue value)
+        {
+            byte[] data = value.ToArray();
+
+            Interface.Checked(Interface.uc_reg_write(uc, (int)register, data));
         }
 
         public byte[] MemoryRead(ulong address, ulong size)
         {
             byte[] value = new byte[size];
+
             Interface.Checked(Interface.uc_mem_read(uc, address, value, size));
+
             return value;
         }
 
-        public byte   MemoryRead8 (ulong address) { return MemoryRead(address, 1)[0]; }
-        public UInt16 MemoryRead16(ulong address) { return (UInt16)BitConverter.ToInt16(MemoryRead(address, 2), 0); }
-        public UInt32 MemoryRead32(ulong address) { return (UInt32)BitConverter.ToInt32(MemoryRead(address, 4), 0); }
-        public UInt64 MemoryRead64(ulong address) { return (UInt64)BitConverter.ToInt64(MemoryRead(address, 8), 0); }
+        public byte   MemoryRead8 (ulong address) => MemoryRead(address, 1)[0];
+        public UInt16 MemoryRead16(ulong address) => (UInt16)BitConverter.ToInt16(MemoryRead(address, 2), 0);
+        public UInt32 MemoryRead32(ulong address) => (UInt32)BitConverter.ToInt32(MemoryRead(address, 4), 0);
+        public UInt64 MemoryRead64(ulong address) => (UInt64)BitConverter.ToInt64(MemoryRead(address, 8), 0);
 
         public void MemoryWrite(ulong address, byte[] value)
         {
             Interface.Checked(Interface.uc_mem_write(uc, address, value, (ulong)value.Length));
         }
 
-        public void MemoryWrite8 (ulong address, byte value)   { MemoryWrite(address, new byte[]{value}); }
-        public void MemoryWrite16(ulong address, Int16 value)  { MemoryWrite(address, BitConverter.GetBytes(value)); }
-        public void MemoryWrite16(ulong address, UInt16 value) { MemoryWrite(address, BitConverter.GetBytes(value)); }
-        public void MemoryWrite32(ulong address, Int32 value)  { MemoryWrite(address, BitConverter.GetBytes(value)); }
-        public void MemoryWrite32(ulong address, UInt32 value) { MemoryWrite(address, BitConverter.GetBytes(value)); }
-        public void MemoryWrite64(ulong address, Int64 value)  { MemoryWrite(address, BitConverter.GetBytes(value)); }
-        public void MemoryWrite64(ulong address, UInt64 value) { MemoryWrite(address, BitConverter.GetBytes(value)); }
+        public void MemoryWrite8 (ulong address, byte value)   => MemoryWrite(address, new byte[]{value});
+        public void MemoryWrite16(ulong address, Int16 value)  => MemoryWrite(address, BitConverter.GetBytes(value));
+        public void MemoryWrite16(ulong address, UInt16 value) => MemoryWrite(address, BitConverter.GetBytes(value));
+        public void MemoryWrite32(ulong address, Int32 value)  => MemoryWrite(address, BitConverter.GetBytes(value));
+        public void MemoryWrite32(ulong address, UInt32 value) => MemoryWrite(address, BitConverter.GetBytes(value));
+        public void MemoryWrite64(ulong address, Int64 value)  => MemoryWrite(address, BitConverter.GetBytes(value));
+        public void MemoryWrite64(ulong address, UInt64 value) => MemoryWrite(address, BitConverter.GetBytes(value));
 
         public void MemoryMap(ulong address, ulong size, MemoryPermission permissions)
         {
@@ -289,21 +294,12 @@ namespace Ryujinx.Tests.Unicorn
             Interface.Checked(Interface.uc_mem_protect(uc, address, size, (uint)permissions));
         }
 
-        public void DumpMemoryInformation()
-        {
-            Interface.Checked(Interface.uc_mem_regions(uc, out IntPtr regions_raw, out uint length));
-            Interface.MarshalArrayOf<UnicornMemoryRegion>(regions_raw, (int)length, out var regions);
-            foreach (var region in regions)
-            {
-                Console.WriteLine("region: begin {0:X16} end {1:X16} perms {2:X8}", region.begin, region.end, region.perms);
-            }
-        }
-
         public static bool IsAvailable()
         {
             try
             {
-                Interface.uc_version(out uint major, out uint minor);
+                Interface.uc_version(out _, out _);
+
                 return true;
             }
             catch (DllNotFoundException)
diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs
index b147cf446..1e7b75c61 100644
--- a/Ryujinx.Tests/Cpu/CpuTest.cs
+++ b/Ryujinx.Tests/Cpu/CpuTest.cs
@@ -1,7 +1,6 @@
-using ChocolArm64;
-using ChocolArm64.Memory;
-using ChocolArm64.State;
-using ChocolArm64.Translation;
+using ARMeilleure.Memory;
+using ARMeilleure.State;
+using ARMeilleure.Translation;
 
 using NUnit.Framework;
 
@@ -9,24 +8,24 @@ using Ryujinx.Tests.Unicorn;
 
 using System;
 using System.Runtime.InteropServices;
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
-using System.Threading;
 
 namespace Ryujinx.Tests.Cpu
 {
     [TestFixture]
     public class CpuTest
     {
-        protected long Position { get; private set; }
-        private long _size;
+        private ulong _currAddress;
+        private long  _size;
 
-        private long _entryPoint;
+        private ulong _entryPoint;
 
         private IntPtr _ramPointer;
 
         private MemoryManager _memory;
-        private CpuThread     _thread;
+
+        private ExecutionContext _context;
+
+        private Translator _translator;
 
         private static bool _unicornAvailable;
         private UnicornAArch64 _unicornEmu;
@@ -44,24 +43,24 @@ namespace Ryujinx.Tests.Cpu
         [SetUp]
         public void Setup()
         {
-            Position = 0x1000;
-            _size    = 0x1000;
+            _currAddress = 0x1000;
+            _size        = 0x1000;
 
-            _entryPoint = Position;
+            _entryPoint = _currAddress;
 
             _ramPointer = Marshal.AllocHGlobal(new IntPtr(_size));
             _memory = new MemoryManager(_ramPointer);
-            _memory.Map(Position, 0, _size);
+            _memory.Map((long)_currAddress, 0, _size);
 
-            Translator translator = new Translator(_memory);
+            _context = new ExecutionContext();
 
-            _thread = new CpuThread(translator, _memory, _entryPoint);
+            _translator = new Translator(_memory);
 
             if (_unicornAvailable)
             {
                 _unicornEmu = new UnicornAArch64();
-                _unicornEmu.MemoryMap((ulong)Position, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC);
-                _unicornEmu.PC = (ulong)_entryPoint;
+                _unicornEmu.MemoryMap(_currAddress, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC);
+                _unicornEmu.PC = _entryPoint;
             }
         }
 
@@ -70,7 +69,8 @@ namespace Ryujinx.Tests.Cpu
         {
             Marshal.FreeHGlobal(_ramPointer);
             _memory     = null;
-            _thread     = null;
+            _context    = null;
+            _translator = null;
             _unicornEmu = null;
         }
 
@@ -82,51 +82,61 @@ namespace Ryujinx.Tests.Cpu
 
         protected void Opcode(uint opcode)
         {
-            _thread.Memory.WriteUInt32(Position, opcode);
+            _memory.WriteUInt32((long)_currAddress, opcode);
 
             if (_unicornAvailable)
             {
-                _unicornEmu.MemoryWrite32((ulong)Position, opcode);
+                _unicornEmu.MemoryWrite32((ulong)_currAddress, opcode);
             }
 
-            Position += 4;
+            _currAddress += 4;
         }
 
-        protected void SetThreadState(ulong x0 = 0, ulong x1 = 0, ulong x2 = 0, ulong x3 = 0, ulong x31 = 0,
-                                      Vector128<float> v0  = default(Vector128<float>),
-                                      Vector128<float> v1  = default(Vector128<float>),
-                                      Vector128<float> v2  = default(Vector128<float>),
-                                      Vector128<float> v3  = default(Vector128<float>),
-                                      Vector128<float> v4  = default(Vector128<float>),
-                                      Vector128<float> v5  = default(Vector128<float>),
-                                      Vector128<float> v30 = default(Vector128<float>),
-                                      Vector128<float> v31 = default(Vector128<float>),
-                                      bool overflow = false, bool carry = false, bool zero = false, bool negative = false,
-                                      int fpcr = 0x0, int fpsr = 0x0)
+        protected ExecutionContext GetContext() => _context;
+
+        protected void SetContext(ulong x0       = 0,
+                                  ulong x1       = 0,
+                                  ulong x2       = 0,
+                                  ulong x3       = 0,
+                                  ulong x31      = 0,
+                                  V128  v0       = default(V128),
+                                  V128  v1       = default(V128),
+                                  V128  v2       = default(V128),
+                                  V128  v3       = default(V128),
+                                  V128  v4       = default(V128),
+                                  V128  v5       = default(V128),
+                                  V128  v30      = default(V128),
+                                  V128  v31      = default(V128),
+                                  bool  overflow = false,
+                                  bool  carry    = false,
+                                  bool  zero     = false,
+                                  bool  negative = false,
+                                  int   fpcr     = 0,
+                                  int   fpsr     = 0)
         {
-            _thread.ThreadState.X0 = x0;
-            _thread.ThreadState.X1 = x1;
-            _thread.ThreadState.X2 = x2;
-            _thread.ThreadState.X3 = x3;
+            _context.SetX(0, x0);
+            _context.SetX(1, x1);
+            _context.SetX(2, x2);
+            _context.SetX(3, x3);
 
-            _thread.ThreadState.X31 = x31;
+            _context.SetX(31, x31);
 
-            _thread.ThreadState.V0  = v0;
-            _thread.ThreadState.V1  = v1;
-            _thread.ThreadState.V2  = v2;
-            _thread.ThreadState.V3  = v3;
-            _thread.ThreadState.V4  = v4;
-            _thread.ThreadState.V5  = v5;
-            _thread.ThreadState.V30 = v30;
-            _thread.ThreadState.V31 = v31;
+            _context.SetV(0,  v0);
+            _context.SetV(1,  v1);
+            _context.SetV(2,  v2);
+            _context.SetV(3,  v3);
+            _context.SetV(4,  v4);
+            _context.SetV(5,  v5);
+            _context.SetV(30, v30);
+            _context.SetV(31, v31);
 
-            _thread.ThreadState.Overflow = overflow;
-            _thread.ThreadState.Carry    = carry;
-            _thread.ThreadState.Zero     = zero;
-            _thread.ThreadState.Negative = negative;
+            _context.SetPstateFlag(PState.VFlag, overflow);
+            _context.SetPstateFlag(PState.CFlag, carry);
+            _context.SetPstateFlag(PState.ZFlag, zero);
+            _context.SetPstateFlag(PState.NFlag, negative);
 
-            _thread.ThreadState.Fpcr = fpcr;
-            _thread.ThreadState.Fpsr = fpsr;
+            _context.Fpcr = (FPCR)fpcr;
+            _context.Fpsr = (FPSR)fpsr;
 
             if (_unicornAvailable)
             {
@@ -137,14 +147,14 @@ namespace Ryujinx.Tests.Cpu
 
                 _unicornEmu.SP = x31;
 
-                _unicornEmu.Q[0]  = v0;
-                _unicornEmu.Q[1]  = v1;
-                _unicornEmu.Q[2]  = v2;
-                _unicornEmu.Q[3]  = v3;
-                _unicornEmu.Q[4]  = v4;
-                _unicornEmu.Q[5]  = v5;
-                _unicornEmu.Q[30] = v30;
-                _unicornEmu.Q[31] = v31;
+                _unicornEmu.Q[0]  = V128ToSimdValue(v0);
+                _unicornEmu.Q[1]  = V128ToSimdValue(v1);
+                _unicornEmu.Q[2]  = V128ToSimdValue(v2);
+                _unicornEmu.Q[3]  = V128ToSimdValue(v3);
+                _unicornEmu.Q[4]  = V128ToSimdValue(v4);
+                _unicornEmu.Q[5]  = V128ToSimdValue(v5);
+                _unicornEmu.Q[30] = V128ToSimdValue(v30);
+                _unicornEmu.Q[31] = V128ToSimdValue(v31);
 
                 _unicornEmu.OverflowFlag = overflow;
                 _unicornEmu.CarryFlag    = carry;
@@ -158,43 +168,41 @@ namespace Ryujinx.Tests.Cpu
 
         protected void ExecuteOpcodes()
         {
-            using (ManualResetEvent wait = new ManualResetEvent(false))
-            {
-                _thread.ThreadState.Break += (sender, e) => _thread.StopExecution();
-                _thread.WorkFinished += (sender, e) => wait.Set();
-
-                _thread.Execute();
-                wait.WaitOne();
-            }
+            _translator.Execute(_context, _entryPoint);
 
             if (_unicornAvailable)
             {
-                _unicornEmu.RunForCount((ulong)(Position - _entryPoint - 8) / 4);
+                _unicornEmu.RunForCount((ulong)(_currAddress - _entryPoint - 4) / 4);
             }
         }
 
-        protected CpuThreadState GetThreadState() => _thread.ThreadState;
-
-        protected CpuThreadState SingleOpcode(uint opcode,
-                                              ulong x0 = 0, ulong x1 = 0, ulong x2 = 0, ulong x3 = 0, ulong x31 = 0,
-                                              Vector128<float> v0  = default(Vector128<float>),
-                                              Vector128<float> v1  = default(Vector128<float>),
-                                              Vector128<float> v2  = default(Vector128<float>),
-                                              Vector128<float> v3  = default(Vector128<float>),
-                                              Vector128<float> v4  = default(Vector128<float>),
-                                              Vector128<float> v5  = default(Vector128<float>),
-                                              Vector128<float> v30 = default(Vector128<float>),
-                                              Vector128<float> v31 = default(Vector128<float>),
-                                              bool overflow = false, bool carry = false, bool zero = false, bool negative = false,
-                                              int fpcr = 0x0, int fpsr = 0x0)
+        protected ExecutionContext SingleOpcode(uint  opcode,
+                                                ulong x0       = 0,
+                                                ulong x1       = 0,
+                                                ulong x2       = 0,
+                                                ulong x3       = 0,
+                                                ulong x31      = 0,
+                                                V128  v0       = default(V128),
+                                                V128  v1       = default(V128),
+                                                V128  v2       = default(V128),
+                                                V128  v3       = default(V128),
+                                                V128  v4       = default(V128),
+                                                V128  v5       = default(V128),
+                                                V128  v30      = default(V128),
+                                                V128  v31      = default(V128),
+                                                bool  overflow = false,
+                                                bool  carry    = false,
+                                                bool  zero     = false,
+                                                bool  negative = false,
+                                                int   fpcr     = 0,
+                                                int   fpsr     = 0)
         {
             Opcode(opcode);
-            Opcode(0xD4200000); // BRK #0
             Opcode(0xD65F03C0); // RET
-            SetThreadState(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr);
+            SetContext(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr);
             ExecuteOpcodes();
 
-            return GetThreadState();
+            return GetContext();
         }
 
         /// <summary>Rounding Mode control field.</summary>
@@ -279,101 +287,101 @@ namespace Ryujinx.Tests.Cpu
                 ManageFpSkips(fpSkips);
             }
 
-            Assert.That(_thread.ThreadState.X0,  Is.EqualTo(_unicornEmu.X[0]));
-            Assert.That(_thread.ThreadState.X1,  Is.EqualTo(_unicornEmu.X[1]));
-            Assert.That(_thread.ThreadState.X2,  Is.EqualTo(_unicornEmu.X[2]));
-            Assert.That(_thread.ThreadState.X3,  Is.EqualTo(_unicornEmu.X[3]));
-            Assert.That(_thread.ThreadState.X4,  Is.EqualTo(_unicornEmu.X[4]));
-            Assert.That(_thread.ThreadState.X5,  Is.EqualTo(_unicornEmu.X[5]));
-            Assert.That(_thread.ThreadState.X6,  Is.EqualTo(_unicornEmu.X[6]));
-            Assert.That(_thread.ThreadState.X7,  Is.EqualTo(_unicornEmu.X[7]));
-            Assert.That(_thread.ThreadState.X8,  Is.EqualTo(_unicornEmu.X[8]));
-            Assert.That(_thread.ThreadState.X9,  Is.EqualTo(_unicornEmu.X[9]));
-            Assert.That(_thread.ThreadState.X10, Is.EqualTo(_unicornEmu.X[10]));
-            Assert.That(_thread.ThreadState.X11, Is.EqualTo(_unicornEmu.X[11]));
-            Assert.That(_thread.ThreadState.X12, Is.EqualTo(_unicornEmu.X[12]));
-            Assert.That(_thread.ThreadState.X13, Is.EqualTo(_unicornEmu.X[13]));
-            Assert.That(_thread.ThreadState.X14, Is.EqualTo(_unicornEmu.X[14]));
-            Assert.That(_thread.ThreadState.X15, Is.EqualTo(_unicornEmu.X[15]));
-            Assert.That(_thread.ThreadState.X16, Is.EqualTo(_unicornEmu.X[16]));
-            Assert.That(_thread.ThreadState.X17, Is.EqualTo(_unicornEmu.X[17]));
-            Assert.That(_thread.ThreadState.X18, Is.EqualTo(_unicornEmu.X[18]));
-            Assert.That(_thread.ThreadState.X19, Is.EqualTo(_unicornEmu.X[19]));
-            Assert.That(_thread.ThreadState.X20, Is.EqualTo(_unicornEmu.X[20]));
-            Assert.That(_thread.ThreadState.X21, Is.EqualTo(_unicornEmu.X[21]));
-            Assert.That(_thread.ThreadState.X22, Is.EqualTo(_unicornEmu.X[22]));
-            Assert.That(_thread.ThreadState.X23, Is.EqualTo(_unicornEmu.X[23]));
-            Assert.That(_thread.ThreadState.X24, Is.EqualTo(_unicornEmu.X[24]));
-            Assert.That(_thread.ThreadState.X25, Is.EqualTo(_unicornEmu.X[25]));
-            Assert.That(_thread.ThreadState.X26, Is.EqualTo(_unicornEmu.X[26]));
-            Assert.That(_thread.ThreadState.X27, Is.EqualTo(_unicornEmu.X[27]));
-            Assert.That(_thread.ThreadState.X28, Is.EqualTo(_unicornEmu.X[28]));
-            Assert.That(_thread.ThreadState.X29, Is.EqualTo(_unicornEmu.X[29]));
-            Assert.That(_thread.ThreadState.X30, Is.EqualTo(_unicornEmu.X[30]));
+            Assert.That(_context.GetX(0),  Is.EqualTo(_unicornEmu.X[0]));
+            Assert.That(_context.GetX(1),  Is.EqualTo(_unicornEmu.X[1]));
+            Assert.That(_context.GetX(2),  Is.EqualTo(_unicornEmu.X[2]));
+            Assert.That(_context.GetX(3),  Is.EqualTo(_unicornEmu.X[3]));
+            Assert.That(_context.GetX(4),  Is.EqualTo(_unicornEmu.X[4]));
+            Assert.That(_context.GetX(5),  Is.EqualTo(_unicornEmu.X[5]));
+            Assert.That(_context.GetX(6),  Is.EqualTo(_unicornEmu.X[6]));
+            Assert.That(_context.GetX(7),  Is.EqualTo(_unicornEmu.X[7]));
+            Assert.That(_context.GetX(8),  Is.EqualTo(_unicornEmu.X[8]));
+            Assert.That(_context.GetX(9),  Is.EqualTo(_unicornEmu.X[9]));
+            Assert.That(_context.GetX(10), Is.EqualTo(_unicornEmu.X[10]));
+            Assert.That(_context.GetX(11), Is.EqualTo(_unicornEmu.X[11]));
+            Assert.That(_context.GetX(12), Is.EqualTo(_unicornEmu.X[12]));
+            Assert.That(_context.GetX(13), Is.EqualTo(_unicornEmu.X[13]));
+            Assert.That(_context.GetX(14), Is.EqualTo(_unicornEmu.X[14]));
+            Assert.That(_context.GetX(15), Is.EqualTo(_unicornEmu.X[15]));
+            Assert.That(_context.GetX(16), Is.EqualTo(_unicornEmu.X[16]));
+            Assert.That(_context.GetX(17), Is.EqualTo(_unicornEmu.X[17]));
+            Assert.That(_context.GetX(18), Is.EqualTo(_unicornEmu.X[18]));
+            Assert.That(_context.GetX(19), Is.EqualTo(_unicornEmu.X[19]));
+            Assert.That(_context.GetX(20), Is.EqualTo(_unicornEmu.X[20]));
+            Assert.That(_context.GetX(21), Is.EqualTo(_unicornEmu.X[21]));
+            Assert.That(_context.GetX(22), Is.EqualTo(_unicornEmu.X[22]));
+            Assert.That(_context.GetX(23), Is.EqualTo(_unicornEmu.X[23]));
+            Assert.That(_context.GetX(24), Is.EqualTo(_unicornEmu.X[24]));
+            Assert.That(_context.GetX(25), Is.EqualTo(_unicornEmu.X[25]));
+            Assert.That(_context.GetX(26), Is.EqualTo(_unicornEmu.X[26]));
+            Assert.That(_context.GetX(27), Is.EqualTo(_unicornEmu.X[27]));
+            Assert.That(_context.GetX(28), Is.EqualTo(_unicornEmu.X[28]));
+            Assert.That(_context.GetX(29), Is.EqualTo(_unicornEmu.X[29]));
+            Assert.That(_context.GetX(30), Is.EqualTo(_unicornEmu.X[30]));
 
-            Assert.That(_thread.ThreadState.X31, Is.EqualTo(_unicornEmu.SP));
+            Assert.That(_context.GetX(31), Is.EqualTo(_unicornEmu.SP));
 
             if (fpTolerances == FpTolerances.None)
             {
-                Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0]));
+                Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
             }
             else
             {
                 ManageFpTolerances(fpTolerances);
             }
-            Assert.That(_thread.ThreadState.V1,  Is.EqualTo(_unicornEmu.Q[1]));
-            Assert.That(_thread.ThreadState.V2,  Is.EqualTo(_unicornEmu.Q[2]));
-            Assert.That(_thread.ThreadState.V3,  Is.EqualTo(_unicornEmu.Q[3]));
-            Assert.That(_thread.ThreadState.V4,  Is.EqualTo(_unicornEmu.Q[4]));
-            Assert.That(_thread.ThreadState.V5,  Is.EqualTo(_unicornEmu.Q[5]));
-            Assert.That(_thread.ThreadState.V6,  Is.EqualTo(_unicornEmu.Q[6]));
-            Assert.That(_thread.ThreadState.V7,  Is.EqualTo(_unicornEmu.Q[7]));
-            Assert.That(_thread.ThreadState.V8,  Is.EqualTo(_unicornEmu.Q[8]));
-            Assert.That(_thread.ThreadState.V9,  Is.EqualTo(_unicornEmu.Q[9]));
-            Assert.That(_thread.ThreadState.V10, Is.EqualTo(_unicornEmu.Q[10]));
-            Assert.That(_thread.ThreadState.V11, Is.EqualTo(_unicornEmu.Q[11]));
-            Assert.That(_thread.ThreadState.V12, Is.EqualTo(_unicornEmu.Q[12]));
-            Assert.That(_thread.ThreadState.V13, Is.EqualTo(_unicornEmu.Q[13]));
-            Assert.That(_thread.ThreadState.V14, Is.EqualTo(_unicornEmu.Q[14]));
-            Assert.That(_thread.ThreadState.V15, Is.EqualTo(_unicornEmu.Q[15]));
-            Assert.That(_thread.ThreadState.V16, Is.EqualTo(_unicornEmu.Q[16]));
-            Assert.That(_thread.ThreadState.V17, Is.EqualTo(_unicornEmu.Q[17]));
-            Assert.That(_thread.ThreadState.V18, Is.EqualTo(_unicornEmu.Q[18]));
-            Assert.That(_thread.ThreadState.V19, Is.EqualTo(_unicornEmu.Q[19]));
-            Assert.That(_thread.ThreadState.V20, Is.EqualTo(_unicornEmu.Q[20]));
-            Assert.That(_thread.ThreadState.V21, Is.EqualTo(_unicornEmu.Q[21]));
-            Assert.That(_thread.ThreadState.V22, Is.EqualTo(_unicornEmu.Q[22]));
-            Assert.That(_thread.ThreadState.V23, Is.EqualTo(_unicornEmu.Q[23]));
-            Assert.That(_thread.ThreadState.V24, Is.EqualTo(_unicornEmu.Q[24]));
-            Assert.That(_thread.ThreadState.V25, Is.EqualTo(_unicornEmu.Q[25]));
-            Assert.That(_thread.ThreadState.V26, Is.EqualTo(_unicornEmu.Q[26]));
-            Assert.That(_thread.ThreadState.V27, Is.EqualTo(_unicornEmu.Q[27]));
-            Assert.That(_thread.ThreadState.V28, Is.EqualTo(_unicornEmu.Q[28]));
-            Assert.That(_thread.ThreadState.V29, Is.EqualTo(_unicornEmu.Q[29]));
-            Assert.That(_thread.ThreadState.V30, Is.EqualTo(_unicornEmu.Q[30]));
-            Assert.That(_thread.ThreadState.V31, Is.EqualTo(_unicornEmu.Q[31]));
+            Assert.That(V128ToSimdValue(_context.GetV(1)),  Is.EqualTo(_unicornEmu.Q[1]));
+            Assert.That(V128ToSimdValue(_context.GetV(2)),  Is.EqualTo(_unicornEmu.Q[2]));
+            Assert.That(V128ToSimdValue(_context.GetV(3)),  Is.EqualTo(_unicornEmu.Q[3]));
+            Assert.That(V128ToSimdValue(_context.GetV(4)),  Is.EqualTo(_unicornEmu.Q[4]));
+            Assert.That(V128ToSimdValue(_context.GetV(5)),  Is.EqualTo(_unicornEmu.Q[5]));
+            Assert.That(V128ToSimdValue(_context.GetV(6)),  Is.EqualTo(_unicornEmu.Q[6]));
+            Assert.That(V128ToSimdValue(_context.GetV(7)),  Is.EqualTo(_unicornEmu.Q[7]));
+            Assert.That(V128ToSimdValue(_context.GetV(8)),  Is.EqualTo(_unicornEmu.Q[8]));
+            Assert.That(V128ToSimdValue(_context.GetV(9)),  Is.EqualTo(_unicornEmu.Q[9]));
+            Assert.That(V128ToSimdValue(_context.GetV(10)), Is.EqualTo(_unicornEmu.Q[10]));
+            Assert.That(V128ToSimdValue(_context.GetV(11)), Is.EqualTo(_unicornEmu.Q[11]));
+            Assert.That(V128ToSimdValue(_context.GetV(12)), Is.EqualTo(_unicornEmu.Q[12]));
+            Assert.That(V128ToSimdValue(_context.GetV(13)), Is.EqualTo(_unicornEmu.Q[13]));
+            Assert.That(V128ToSimdValue(_context.GetV(14)), Is.EqualTo(_unicornEmu.Q[14]));
+            Assert.That(V128ToSimdValue(_context.GetV(15)), Is.EqualTo(_unicornEmu.Q[15]));
+            Assert.That(V128ToSimdValue(_context.GetV(16)), Is.EqualTo(_unicornEmu.Q[16]));
+            Assert.That(V128ToSimdValue(_context.GetV(17)), Is.EqualTo(_unicornEmu.Q[17]));
+            Assert.That(V128ToSimdValue(_context.GetV(18)), Is.EqualTo(_unicornEmu.Q[18]));
+            Assert.That(V128ToSimdValue(_context.GetV(19)), Is.EqualTo(_unicornEmu.Q[19]));
+            Assert.That(V128ToSimdValue(_context.GetV(20)), Is.EqualTo(_unicornEmu.Q[20]));
+            Assert.That(V128ToSimdValue(_context.GetV(21)), Is.EqualTo(_unicornEmu.Q[21]));
+            Assert.That(V128ToSimdValue(_context.GetV(22)), Is.EqualTo(_unicornEmu.Q[22]));
+            Assert.That(V128ToSimdValue(_context.GetV(23)), Is.EqualTo(_unicornEmu.Q[23]));
+            Assert.That(V128ToSimdValue(_context.GetV(24)), Is.EqualTo(_unicornEmu.Q[24]));
+            Assert.That(V128ToSimdValue(_context.GetV(25)), Is.EqualTo(_unicornEmu.Q[25]));
+            Assert.That(V128ToSimdValue(_context.GetV(26)), Is.EqualTo(_unicornEmu.Q[26]));
+            Assert.That(V128ToSimdValue(_context.GetV(27)), Is.EqualTo(_unicornEmu.Q[27]));
+            Assert.That(V128ToSimdValue(_context.GetV(28)), Is.EqualTo(_unicornEmu.Q[28]));
+            Assert.That(V128ToSimdValue(_context.GetV(29)), Is.EqualTo(_unicornEmu.Q[29]));
+            Assert.That(V128ToSimdValue(_context.GetV(30)), Is.EqualTo(_unicornEmu.Q[30]));
+            Assert.That(V128ToSimdValue(_context.GetV(31)), Is.EqualTo(_unicornEmu.Q[31]));
 
-            Assert.That(_thread.ThreadState.Fpcr,                 Is.EqualTo(_unicornEmu.Fpcr));
-            Assert.That(_thread.ThreadState.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask));
+            Assert.That((int)_context.Fpcr,                 Is.EqualTo(_unicornEmu.Fpcr));
+            Assert.That((int)_context.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask));
 
-            Assert.That(_thread.ThreadState.Overflow, Is.EqualTo(_unicornEmu.OverflowFlag));
-            Assert.That(_thread.ThreadState.Carry,    Is.EqualTo(_unicornEmu.CarryFlag));
-            Assert.That(_thread.ThreadState.Zero,     Is.EqualTo(_unicornEmu.ZeroFlag));
-            Assert.That(_thread.ThreadState.Negative, Is.EqualTo(_unicornEmu.NegativeFlag));
+            Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag));
+            Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag));
+            Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag));
+            Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag));
         }
 
         private void ManageFpSkips(FpSkips fpSkips)
         {
             if (fpSkips.HasFlag(FpSkips.IfNaNS))
             {
-                if (float.IsNaN(VectorExtractSingle(_unicornEmu.Q[0], (byte)0)))
+                if (float.IsNaN(_unicornEmu.Q[0].AsFloat()))
                 {
                     Assert.Ignore("NaN test.");
                 }
             }
             else if (fpSkips.HasFlag(FpSkips.IfNaND))
             {
-                if (double.IsNaN(VectorExtractDouble(_unicornEmu.Q[0], (byte)0)))
+                if (double.IsNaN(_unicornEmu.Q[0].AsDouble()))
                 {
                     Assert.Ignore("NaN test.");
                 }
@@ -398,158 +406,68 @@ namespace Ryujinx.Tests.Cpu
 
         private void ManageFpTolerances(FpTolerances fpTolerances)
         {
-            if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(_thread.ThreadState.V0).IsSuccess)
+            bool IsNormalOrSubnormalS(float f)  => float.IsNormal(f)  || float.IsSubnormal(f);
+            bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d);
+
+            if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(V128ToSimdValue(_context.GetV(0))).IsSuccess)
             {
                 if (fpTolerances == FpTolerances.UpToOneUlpsS)
                 {
-                    if (IsNormalOrSubnormalS(VectorExtractSingle(_unicornEmu.Q[0],       (byte)0)) &&
-                        IsNormalOrSubnormalS(VectorExtractSingle(_thread.ThreadState.V0, (byte)0)))
+                    if (IsNormalOrSubnormalS(_unicornEmu.Q[0].AsFloat()) &&
+                        IsNormalOrSubnormalS(_context.GetV(0).AsFloat()))
                     {
-                        Assert.That   (VectorExtractSingle(_thread.ThreadState.V0, (byte)0),
-                            Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0],       (byte)0)).Within(1).Ulps);
-                        Assert.That   (VectorExtractSingle(_thread.ThreadState.V0, (byte)1),
-                            Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0],       (byte)1)).Within(1).Ulps);
-                        Assert.That   (VectorExtractSingle(_thread.ThreadState.V0, (byte)2),
-                            Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0],       (byte)2)).Within(1).Ulps);
-                        Assert.That   (VectorExtractSingle(_thread.ThreadState.V0, (byte)3),
-                            Is.EqualTo(VectorExtractSingle(_unicornEmu.Q[0],       (byte)3)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetFloat(0),
+                            Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetFloat(1),
+                            Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetFloat(2),
+                            Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetFloat(3),
+                            Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps);
 
                         Console.WriteLine(fpTolerances);
                     }
                     else
                     {
-                        Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0]));
+                        Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
                     }
                 }
 
                 if (fpTolerances == FpTolerances.UpToOneUlpsD)
                 {
-                    if (IsNormalOrSubnormalD(VectorExtractDouble(_unicornEmu.Q[0],       (byte)0)) &&
-                        IsNormalOrSubnormalD(VectorExtractDouble(_thread.ThreadState.V0, (byte)0)))
+                    if (IsNormalOrSubnormalD(_unicornEmu.Q[0].AsDouble()) &&
+                        IsNormalOrSubnormalD(_context.GetV(0).AsDouble()))
                     {
-                        Assert.That   (VectorExtractDouble(_thread.ThreadState.V0, (byte)0),
-                            Is.EqualTo(VectorExtractDouble(_unicornEmu.Q[0],       (byte)0)).Within(1).Ulps);
-                        Assert.That   (VectorExtractDouble(_thread.ThreadState.V0, (byte)1),
-                            Is.EqualTo(VectorExtractDouble(_unicornEmu.Q[0],       (byte)1)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetDouble(0),
+                            Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps);
+                        Assert.That   (_context.GetV(0).GetDouble(1),
+                            Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps);
 
                         Console.WriteLine(fpTolerances);
                     }
                     else
                     {
-                        Assert.That(_thread.ThreadState.V0, Is.EqualTo(_unicornEmu.Q[0]));
+                        Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
                     }
                 }
             }
-
-            bool IsNormalOrSubnormalS(float f)  => float.IsNormal(f)  || float.IsSubnormal(f);
-
-            bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d);
         }
 
-        protected static Vector128<float> MakeVectorE0(double e0)
+        private static SimdValue V128ToSimdValue(V128 value)
         {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<long, float>(Sse2.SetVector128(0, BitConverter.DoubleToInt64Bits(e0)));
+            return new SimdValue(value.GetUInt64(0), value.GetUInt64(1));
         }
 
-        protected static Vector128<float> MakeVectorE0E1(double e0, double e1)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
+        protected static V128 MakeVectorScalar(float value)  => new V128(value);
+        protected static V128 MakeVectorScalar(double value) => new V128(value);
 
-            return Sse.StaticCast<long, float>(
-                Sse2.SetVector128(BitConverter.DoubleToInt64Bits(e1), BitConverter.DoubleToInt64Bits(e0)));
-        }
+        protected static V128 MakeVectorE0(ulong e0) => new V128(e0, 0);
+        protected static V128 MakeVectorE1(ulong e1) => new V128(0, e1);
 
-        protected static Vector128<float> MakeVectorE1(double e1)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
+        protected static V128 MakeVectorE0E1(ulong e0, ulong e1) => new V128(e0, e1);
 
-            return Sse.StaticCast<long, float>(Sse2.SetVector128(BitConverter.DoubleToInt64Bits(e1), 0));
-        }
-
-        protected static float VectorExtractSingle(Vector128<float> vector, byte index)
-        {
-            if (!Sse41.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            int value = Sse41.Extract(Sse.StaticCast<float, int>(vector), index);
-
-            return BitConverter.Int32BitsToSingle(value);
-        }
-
-        protected static double VectorExtractDouble(Vector128<float> vector, byte index)
-        {
-            if (!Sse41.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            long value = Sse41.Extract(Sse.StaticCast<float, long>(vector), index);
-
-            return BitConverter.Int64BitsToDouble(value);
-        }
-
-        protected static Vector128<float> MakeVectorE0(ulong e0)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<ulong, float>(Sse2.SetVector128(0, e0));
-        }
-
-        protected static Vector128<float> MakeVectorE0E1(ulong e0, ulong e1)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<ulong, float>(Sse2.SetVector128(e1, e0));
-        }
-
-        protected static Vector128<float> MakeVectorE1(ulong e1)
-        {
-            if (!Sse2.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse.StaticCast<ulong, float>(Sse2.SetVector128(e1, 0));
-        }
-
-        protected static ulong GetVectorE0(Vector128<float> vector)
-        {
-            if (!Sse41.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), (byte)0);
-        }
-
-        protected static ulong GetVectorE1(Vector128<float> vector)
-        {
-            if (!Sse41.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), (byte)1);
-        }
+        protected static ulong GetVectorE0(V128 vector) => vector.GetUInt64(0);
+        protected static ulong GetVectorE1(V128 vector) => vector.GetUInt64(1);
 
         protected static ushort GenNormalH()
         {
diff --git a/Ryujinx.Tests/Cpu/CpuTestAluBinary.cs b/Ryujinx.Tests/Cpu/CpuTestAluBinary.cs
new file mode 100644
index 000000000..2823477fc
--- /dev/null
+++ b/Ryujinx.Tests/Cpu/CpuTestAluBinary.cs
@@ -0,0 +1,238 @@
+#define AluBinary
+
+using NUnit.Framework;
+
+namespace Ryujinx.Tests.Cpu
+{
+    [Category("AluBinary")]
+    public sealed class CpuTestAluBinary : CpuTest
+    {
+#if AluBinary
+        private const int RndCnt = 2;
+
+        [Test, Pairwise, Description("CRC32X <Wd>, <Wn>, <Xm>"), Ignore("Unicorn fails.")]
+        public void Crc32x([Values(0u, 31u)] uint rd,
+                           [Values(1u, 31u)] uint rn,
+                           [Values(2u, 31u)] uint rm,
+                           [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                           [Values((ulong)0x00_00_00_00_00_00_00_00,
+                                   (ulong)0x7F_FF_FF_FF_FF_FF_FF_FF,
+                                   (ulong)0x80_00_00_00_00_00_00_00,
+                                   (ulong)0xFF_FF_FF_FF_FF_FF_FF_FF)] [Random(RndCnt)] ulong xm)
+        {
+            uint opcode = 0x9AC04C00; // CRC32X W0, W0, X0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: xm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("CRC32W <Wd>, <Wn>, <Wm>"), Ignore("Unicorn fails.")]
+        public void Crc32w([Values(0u, 31u)] uint rd,
+                           [Values(1u, 31u)] uint rn,
+                           [Values(2u, 31u)] uint rm,
+                           [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                           [Values((uint)0x00_00_00_00, (uint)0x7F_FF_FF_FF,
+                                   (uint)0x80_00_00_00, (uint)0xFF_FF_FF_FF)] [Random(RndCnt)] uint wm)
+        {
+            uint opcode = 0x1AC04800; // CRC32W W0, W0, W0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("CRC32H <Wd>, <Wn>, <Wm>"), Ignore("Unicorn fails.")]
+        public void Crc32h([Values(0u, 31u)] uint rd,
+                           [Values(1u, 31u)] uint rn,
+                           [Values(2u, 31u)] uint rm,
+                           [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                           [Values((ushort)0x00_00, (ushort)0x7F_FF,
+                                   (ushort)0x80_00, (ushort)0xFF_FF)] [Random(RndCnt)] ushort wm)
+        {
+            uint opcode = 0x1AC04400; // CRC32H W0, W0, W0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("CRC32B <Wd>, <Wn>, <Wm>"), Ignore("Unicorn fails.")]
+        public void Crc32b([Values(0u, 31u)] uint rd,
+                           [Values(1u, 31u)] uint rn,
+                           [Values(2u, 31u)] uint rm,
+                           [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                           [Values((byte)0x00, (byte)0x7F,
+                                   (byte)0x80, (byte)0xFF)] [Random(RndCnt)] byte wm)
+        {
+            uint opcode = 0x1AC04000; // CRC32B W0, W0, W0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("CRC32CX <Wd>, <Wn>, <Xm>")]
+        public void Crc32cx([Values(0u, 31u)] uint rd,
+                            [Values(1u, 31u)] uint rn,
+                            [Values(2u, 31u)] uint rm,
+                            [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                            [Values((ulong)0x00_00_00_00_00_00_00_00,
+                                    (ulong)0x7F_FF_FF_FF_FF_FF_FF_FF,
+                                    (ulong)0x80_00_00_00_00_00_00_00,
+                                    (ulong)0xFF_FF_FF_FF_FF_FF_FF_FF)] [Random(RndCnt)] ulong xm)
+        {
+            uint opcode = 0x9AC05C00; // CRC32CX W0, W0, X0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: xm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("CRC32CW <Wd>, <Wn>, <Wm>")]
+        public void Crc32cw([Values(0u, 31u)] uint rd,
+                            [Values(1u, 31u)] uint rn,
+                            [Values(2u, 31u)] uint rm,
+                            [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                            [Values((uint)0x00_00_00_00, (uint)0x7F_FF_FF_FF,
+                                    (uint)0x80_00_00_00, (uint)0xFF_FF_FF_FF)] [Random(RndCnt)] uint wm)
+        {
+            uint opcode = 0x1AC05800; // CRC32CW W0, W0, W0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("CRC32CH <Wd>, <Wn>, <Wm>")]
+        public void Crc32ch([Values(0u, 31u)] uint rd,
+                            [Values(1u, 31u)] uint rn,
+                            [Values(2u, 31u)] uint rm,
+                            [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                            [Values((ushort)0x00_00, (ushort)0x7F_FF,
+                                    (ushort)0x80_00, (ushort)0xFF_FF)] [Random(RndCnt)] ushort wm)
+        {
+            uint opcode = 0x1AC05400; // CRC32CH W0, W0, W0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("CRC32CB <Wd>, <Wn>, <Wm>")]
+        public void Crc32cb([Values(0u, 31u)] uint rd,
+                            [Values(1u, 31u)] uint rn,
+                            [Values(2u, 31u)] uint rm,
+                            [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                            [Values((byte)0x00, (byte)0x7F,
+                                    (byte)0x80, (byte)0xFF)] [Random(RndCnt)] byte wm)
+        {
+            uint opcode = 0x1AC05000; // CRC32CB W0, W0, W0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("SDIV <Xd>, <Xn>, <Xm>")]
+        public void Sdiv_64bit([Values(0u, 31u)] uint rd,
+                               [Values(1u, 31u)] uint rn,
+                               [Values(2u, 31u)] uint rm,
+                               [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
+                                       0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xn,
+                               [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
+                                       0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xm)
+        {
+            uint opcode = 0x9AC00C00; // SDIV X0, X0, X0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+
+            SingleOpcode(opcode, x1: xn, x2: xm, x31: x31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("SDIV <Wd>, <Wn>, <Wm>")]
+        public void Sdiv_32bit([Values(0u, 31u)] uint rd,
+                               [Values(1u, 31u)] uint rn,
+                               [Values(2u, 31u)] uint rm,
+                               [Values(0x00000000u, 0x7FFFFFFFu,
+                                       0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                               [Values(0x00000000u, 0x7FFFFFFFu,
+                                       0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm)
+        {
+            uint opcode = 0x1AC00C00; // SDIV W0, W0, W0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("UDIV <Xd>, <Xn>, <Xm>")]
+        public void Udiv_64bit([Values(0u, 31u)] uint rd,
+                               [Values(1u, 31u)] uint rn,
+                               [Values(2u, 31u)] uint rm,
+                               [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
+                                       0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xn,
+                               [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
+                                       0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xm)
+        {
+            uint opcode = 0x9AC00800; // UDIV X0, X0, X0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            ulong x31 = TestContext.CurrentContext.Random.NextULong();
+
+            SingleOpcode(opcode, x1: xn, x2: xm, x31: x31);
+
+            CompareAgainstUnicorn();
+        }
+
+        [Test, Pairwise, Description("UDIV <Wd>, <Wn>, <Wm>")]
+        public void Udiv_32bit([Values(0u, 31u)] uint rd,
+                               [Values(1u, 31u)] uint rn,
+                               [Values(2u, 31u)] uint rm,
+                               [Values(0x00000000u, 0x7FFFFFFFu,
+                                       0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
+                               [Values(0x00000000u, 0x7FFFFFFFu,
+                                       0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm)
+        {
+            uint opcode = 0x1AC00800; // UDIV W0, W0, W0
+            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
+
+            uint w31 = TestContext.CurrentContext.Random.NextUInt();
+
+            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
+
+            CompareAgainstUnicorn();
+        }
+#endif
+    }
+}
diff --git a/Ryujinx.Tests/Cpu/CpuTestAluRs.cs b/Ryujinx.Tests/Cpu/CpuTestAluRs.cs
index 2d4013e2f..418dd56d2 100644
--- a/Ryujinx.Tests/Cpu/CpuTestAluRs.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestAluRs.cs
@@ -394,154 +394,6 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("CRC32X <Wd>, <Wn>, <Xm>"), Ignore("Unicorn fails.")]
-        public void Crc32x([Values(0u, 31u)] uint rd,
-                           [Values(1u, 31u)] uint rn,
-                           [Values(2u, 31u)] uint rm,
-                           [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                           [Values((ulong)0x00_00_00_00_00_00_00_00,
-                                   (ulong)0x7F_FF_FF_FF_FF_FF_FF_FF,
-                                   (ulong)0x80_00_00_00_00_00_00_00,
-                                   (ulong)0xFF_FF_FF_FF_FF_FF_FF_FF)] [Random(RndCnt)] ulong xm)
-        {
-            uint opcode = 0x9AC04C00; // CRC32X W0, W0, X0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: xm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("CRC32W <Wd>, <Wn>, <Wm>"), Ignore("Unicorn fails.")]
-        public void Crc32w([Values(0u, 31u)] uint rd,
-                           [Values(1u, 31u)] uint rn,
-                           [Values(2u, 31u)] uint rm,
-                           [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                           [Values((uint)0x00_00_00_00, (uint)0x7F_FF_FF_FF,
-                                   (uint)0x80_00_00_00, (uint)0xFF_FF_FF_FF)] [Random(RndCnt)] uint wm)
-        {
-            uint opcode = 0x1AC04800; // CRC32W W0, W0, W0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("CRC32H <Wd>, <Wn>, <Wm>"), Ignore("Unicorn fails.")]
-        public void Crc32h([Values(0u, 31u)] uint rd,
-                           [Values(1u, 31u)] uint rn,
-                           [Values(2u, 31u)] uint rm,
-                           [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                           [Values((ushort)0x00_00, (ushort)0x7F_FF,
-                                   (ushort)0x80_00, (ushort)0xFF_FF)] [Random(RndCnt)] ushort wm)
-        {
-            uint opcode = 0x1AC04400; // CRC32H W0, W0, W0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("CRC32B <Wd>, <Wn>, <Wm>"), Ignore("Unicorn fails.")]
-        public void Crc32b([Values(0u, 31u)] uint rd,
-                           [Values(1u, 31u)] uint rn,
-                           [Values(2u, 31u)] uint rm,
-                           [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                           [Values((byte)0x00, (byte)0x7F,
-                                   (byte)0x80, (byte)0xFF)] [Random(RndCnt)] byte wm)
-        {
-            uint opcode = 0x1AC04000; // CRC32B W0, W0, W0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("CRC32CX <Wd>, <Wn>, <Xm>")]
-        public void Crc32cx([Values(0u, 31u)] uint rd,
-                            [Values(1u, 31u)] uint rn,
-                            [Values(2u, 31u)] uint rm,
-                            [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                            [Values((ulong)0x00_00_00_00_00_00_00_00,
-                                    (ulong)0x7F_FF_FF_FF_FF_FF_FF_FF,
-                                    (ulong)0x80_00_00_00_00_00_00_00,
-                                    (ulong)0xFF_FF_FF_FF_FF_FF_FF_FF)] [Random(RndCnt)] ulong xm)
-        {
-            uint opcode = 0x9AC05C00; // CRC32CX W0, W0, X0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: xm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("CRC32CW <Wd>, <Wn>, <Wm>")]
-        public void Crc32cw([Values(0u, 31u)] uint rd,
-                            [Values(1u, 31u)] uint rn,
-                            [Values(2u, 31u)] uint rm,
-                            [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                            [Values((uint)0x00_00_00_00, (uint)0x7F_FF_FF_FF,
-                                    (uint)0x80_00_00_00, (uint)0xFF_FF_FF_FF)] [Random(RndCnt)] uint wm)
-        {
-            uint opcode = 0x1AC05800; // CRC32CW W0, W0, W0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("CRC32CH <Wd>, <Wn>, <Wm>")]
-        public void Crc32ch([Values(0u, 31u)] uint rd,
-                            [Values(1u, 31u)] uint rn,
-                            [Values(2u, 31u)] uint rm,
-                            [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                            [Values((ushort)0x00_00, (ushort)0x7F_FF,
-                                    (ushort)0x80_00, (ushort)0xFF_FF)] [Random(RndCnt)] ushort wm)
-        {
-            uint opcode = 0x1AC05400; // CRC32CH W0, W0, W0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("CRC32CB <Wd>, <Wn>, <Wm>")]
-        public void Crc32cb([Values(0u, 31u)] uint rd,
-                            [Values(1u, 31u)] uint rn,
-                            [Values(2u, 31u)] uint rm,
-                            [Values(0x00000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                            [Values((byte)0x00, (byte)0x7F,
-                                    (byte)0x80, (byte)0xFF)] [Random(RndCnt)] byte wm)
-        {
-            uint opcode = 0x1AC05000; // CRC32CB W0, W0, W0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
-
         [Test, Pairwise, Description("EON <Xd>, <Xn>, <Xm>{, <shift> #<amount>}")]
         public void Eon_64bit([Values(0u, 31u)] uint rd,
                               [Values(1u, 31u)] uint rn,
@@ -954,44 +806,6 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
-        [Test, Pairwise, Description("SDIV <Xd>, <Xn>, <Xm>")]
-        public void Sdiv_64bit([Values(0u, 31u)] uint rd,
-                               [Values(1u, 31u)] uint rn,
-                               [Values(2u, 31u)] uint rm,
-                               [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
-                                       0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xn,
-                               [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
-                                       0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xm)
-        {
-            uint opcode = 0x9AC00C00; // SDIV X0, X0, X0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            ulong x31 = TestContext.CurrentContext.Random.NextULong();
-
-            SingleOpcode(opcode, x1: xn, x2: xm, x31: x31);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("SDIV <Wd>, <Wn>, <Wm>")]
-        public void Sdiv_32bit([Values(0u, 31u)] uint rd,
-                               [Values(1u, 31u)] uint rn,
-                               [Values(2u, 31u)] uint rm,
-                               [Values(0x00000000u, 0x7FFFFFFFu,
-                                       0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                               [Values(0x00000000u, 0x7FFFFFFFu,
-                                       0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm)
-        {
-            uint opcode = 0x1AC00C00; // SDIV W0, W0, W0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
-
         [Test, Pairwise, Description("SUB <Xd>, <Xn>, <Xm>{, <shift> #<amount>}")]
         public void Sub_64bit([Values(0u, 31u)] uint rd,
                               [Values(1u, 31u)] uint rn,
@@ -1079,44 +893,6 @@ namespace Ryujinx.Tests.Cpu
 
             CompareAgainstUnicorn();
         }
-
-        [Test, Pairwise, Description("UDIV <Xd>, <Xn>, <Xm>")]
-        public void Udiv_64bit([Values(0u, 31u)] uint rd,
-                               [Values(1u, 31u)] uint rn,
-                               [Values(2u, 31u)] uint rm,
-                               [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
-                                       0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xn,
-                               [Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
-                                       0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul)] [Random(RndCnt)] ulong xm)
-        {
-            uint opcode = 0x9AC00800; // UDIV X0, X0, X0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            ulong x31 = TestContext.CurrentContext.Random.NextULong();
-
-            SingleOpcode(opcode, x1: xn, x2: xm, x31: x31);
-
-            CompareAgainstUnicorn();
-        }
-
-        [Test, Pairwise, Description("UDIV <Wd>, <Wn>, <Wm>")]
-        public void Udiv_32bit([Values(0u, 31u)] uint rd,
-                               [Values(1u, 31u)] uint rn,
-                               [Values(2u, 31u)] uint rm,
-                               [Values(0x00000000u, 0x7FFFFFFFu,
-                                       0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn,
-                               [Values(0x00000000u, 0x7FFFFFFFu,
-                                       0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm)
-        {
-            uint opcode = 0x1AC00800; // UDIV W0, W0, W0
-            opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
-
-            uint w31 = TestContext.CurrentContext.Random.NextUInt();
-
-            SingleOpcode(opcode, x1: wn, x2: wm, x31: w31);
-
-            CompareAgainstUnicorn();
-        }
 #endif
     }
 }
diff --git a/Ryujinx.Tests/Cpu/CpuTestMisc.cs b/Ryujinx.Tests/Cpu/CpuTestMisc.cs
index e976c2c00..6d2440c18 100644
--- a/Ryujinx.Tests/Cpu/CpuTestMisc.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestMisc.cs
@@ -1,11 +1,9 @@
 #define Misc
 
-using ChocolArm64.State;
+using ARMeilleure.State;
 
 using NUnit.Framework;
 
-using System.Runtime.Intrinsics.X86;
-
 namespace Ryujinx.Tests.Cpu
 {
     [Category("Misc")]
@@ -32,10 +30,9 @@ namespace Ryujinx.Tests.Cpu
             opCmn  |= ((shift & 3) << 22) | ((imm & 4095) << 10);
             opCset |= ((cond & 15) << 12);
 
-            SetThreadState(x0: xn);
+            SetContext(x0: xn);
             Opcode(opCmn);
             Opcode(opCset);
-            Opcode(0xD4200000); // BRK #0
             Opcode(0xD65F03C0); // RET
             ExecuteOpcodes();
 
@@ -58,10 +55,9 @@ namespace Ryujinx.Tests.Cpu
             opCmn  |= ((shift & 3) << 22) | ((imm & 4095) << 10);
             opCset |= ((cond & 15) << 12);
 
-            SetThreadState(x0: wn);
+            SetContext(x0: wn);
             Opcode(opCmn);
             Opcode(opCset);
-            Opcode(0xD4200000); // BRK #0
             Opcode(0xD65F03C0); // RET
             ExecuteOpcodes();
 
@@ -84,10 +80,9 @@ namespace Ryujinx.Tests.Cpu
             opCmp  |= ((shift & 3) << 22) | ((imm & 4095) << 10);
             opCset |= ((cond & 15) << 12);
 
-            SetThreadState(x0: xn);
+            SetContext(x0: xn);
             Opcode(opCmp);
             Opcode(opCset);
-            Opcode(0xD4200000); // BRK #0
             Opcode(0xD65F03C0); // RET
             ExecuteOpcodes();
 
@@ -110,10 +105,9 @@ namespace Ryujinx.Tests.Cpu
             opCmp  |= ((shift & 3) << 22) | ((imm & 4095) << 10);
             opCset |= ((cond & 15) << 12);
 
-            SetThreadState(x0: wn);
+            SetContext(x0: wn);
             Opcode(opCmp);
             Opcode(opCset);
-            Opcode(0xD4200000); // BRK #0
             Opcode(0xD65F03C0); // RET
             ExecuteOpcodes();
 
@@ -136,11 +130,10 @@ namespace Ryujinx.Tests.Cpu
             SUB W0, W0, #3
             MUL W0, W1, W0
             SDIV W0, W2, W0
-            BRK #0
             RET
             */
 
-            SetThreadState(x0: a);
+            SetContext(x0: a);
             Opcode(0x11000C02);
             Opcode(0x51001401);
             Opcode(0x1B017C42);
@@ -148,11 +141,10 @@ namespace Ryujinx.Tests.Cpu
             Opcode(0x51000C00);
             Opcode(0x1B007C20);
             Opcode(0x1AC00C40);
-            Opcode(0xD4200000);
             Opcode(0xD65F03C0);
             ExecuteOpcodes();
 
-            Assert.That(GetThreadState().X0, Is.Zero);
+            Assert.That(GetContext().GetX(0), Is.Zero);
         }
 
         [Explicit]
@@ -185,24 +177,20 @@ namespace Ryujinx.Tests.Cpu
             FADD S0, S0, S1
             FDIV S0, S2, S0
             FMUL S0, S0, S0
-            BRK #0
             RET
             */
 
-            SetThreadState(
-                v0: Sse.SetScalarVector128(a),
-                v1: Sse.SetScalarVector128(b));
+            SetContext(v0: MakeVectorScalar(a), v1: MakeVectorScalar(b));
             Opcode(0x1E2E1002);
             Opcode(0x1E201840);
             Opcode(0x1E211841);
             Opcode(0x1E212800);
             Opcode(0x1E201840);
             Opcode(0x1E200800);
-            Opcode(0xD4200000);
             Opcode(0xD65F03C0);
             ExecuteOpcodes();
 
-            Assert.That(Sse41.Extract(GetThreadState().V0, (byte)0), Is.EqualTo(16f));
+            Assert.That(GetContext().GetV(0).AsFloat(), Is.EqualTo(16f));
         }
 
         [Explicit]
@@ -235,24 +223,20 @@ namespace Ryujinx.Tests.Cpu
             FADD D0, D0, D1
             FDIV D0, D2, D0
             FMUL D0, D0, D0
-            BRK #0
             RET
             */
 
-            SetThreadState(
-                v0: Sse.StaticCast<double, float>(Sse2.SetScalarVector128(a)),
-                v1: Sse.StaticCast<double, float>(Sse2.SetScalarVector128(b)));
+            SetContext(v0: MakeVectorScalar(a), v1: MakeVectorScalar(b));
             Opcode(0x1E6E1002);
             Opcode(0x1E601840);
             Opcode(0x1E611841);
             Opcode(0x1E612800);
             Opcode(0x1E601840);
             Opcode(0x1E600800);
-            Opcode(0xD4200000);
             Opcode(0xD65F03C0);
             ExecuteOpcodes();
 
-            Assert.That(VectorExtractDouble(GetThreadState().V0, (byte)0), Is.EqualTo(16d));
+            Assert.That(GetContext().GetV(0).AsDouble(), Is.EqualTo(16d));
         }
 
         [Test, Ignore("The Tester supports only one return point.")]
@@ -279,9 +263,9 @@ namespace Ryujinx.Tests.Cpu
 
             /*
             0x0000000000001000: MOV W4, W0
-            0x0000000000001004: CBZ W0, #0x3C
+            0x0000000000001004: CBZ W0, #0x34
             0x0000000000001008: CMP W0, #1
-            0x000000000000100C: B.LS #0x48
+            0x000000000000100C: B.LS #0x34
             0x0000000000001010: MOVZ W2, #0x2
             0x0000000000001014: MOVZ X1, #0x1
             0x0000000000001018: MOVZ X3, #0
@@ -290,22 +274,19 @@ namespace Ryujinx.Tests.Cpu
             0x0000000000001024: MOV X3, X1
             0x0000000000001028: MOV X1, X0
             0x000000000000102C: CMP W4, W2
-            0x0000000000001030: B.HS #0x1C
-            0x0000000000001034: BRK #0
-            0x0000000000001038: RET
-            0x000000000000103C: MOVZ X0, #0
-            0x0000000000001040: BRK #0
+            0x0000000000001030: B.HS #-0x14
+            0x0000000000001034: RET
+            0x0000000000001038: MOVZ X0, #0
+            0x000000000000103C: RET
+            0x0000000000001040: MOVZ X0, #0x1
             0x0000000000001044: RET
-            0x0000000000001048: MOVZ X0, #0x1
-            0x000000000000104C: BRK #0
-            0x0000000000001050: RET
             */
 
-            SetThreadState(x0: a);
+            SetContext(x0: a);
             Opcode(0x2A0003E4);
-            Opcode(0x340001C0);
+            Opcode(0x340001A0);
             Opcode(0x7100041F);
-            Opcode(0x540001E9);
+            Opcode(0x540001A9);
             Opcode(0x52800042);
             Opcode(0xD2800021);
             Opcode(0xD2800003);
@@ -315,17 +296,14 @@ namespace Ryujinx.Tests.Cpu
             Opcode(0xAA0003E1);
             Opcode(0x6B02009F);
             Opcode(0x54FFFF62);
-            Opcode(0xD4200000);
             Opcode(0xD65F03C0);
             Opcode(0xD2800000);
-            Opcode(0xD4200000);
             Opcode(0xD65F03C0);
             Opcode(0xD2800020);
-            Opcode(0xD4200000);
             Opcode(0xD65F03C0);
             ExecuteOpcodes();
 
-            Assert.That(GetThreadState().X0, Is.EqualTo(Fn(a)));
+            Assert.That(GetContext().GetX(0), Is.EqualTo(Fn(a)));
         }
 
         [Explicit]
@@ -338,18 +316,16 @@ namespace Ryujinx.Tests.Cpu
             0x0000000000001000: MOV X0, #2
             0x0000000000001004: MOV X1, #3
             0x0000000000001008: ADD X0, X0, X1
-            0x000000000000100C: BRK #0
-            0x0000000000001010: RET
+            0x000000000000100C: RET
             */
 
             Opcode(0xD2800040);
             Opcode(0xD2800061);
             Opcode(0x8B010000);
-            Opcode(0xD4200000);
             Opcode(0xD65F03C0);
             ExecuteOpcodes();
 
-            Assert.That(GetThreadState().X0, Is.EqualTo(result));
+            Assert.That(GetContext().GetX(0), Is.EqualTo(result));
 
             Reset();
 
@@ -357,18 +333,16 @@ namespace Ryujinx.Tests.Cpu
             0x0000000000001000: MOV X0, #3
             0x0000000000001004: MOV X1, #2
             0x0000000000001008: ADD X0, X0, X1
-            0x000000000000100C: BRK #0
-            0x0000000000001010: RET
+            0x000000000000100C: RET
             */
 
             Opcode(0xD2800060);
             Opcode(0xD2800041);
             Opcode(0x8B010000);
-            Opcode(0xD4200000);
             Opcode(0xD65F03C0);
             ExecuteOpcodes();
 
-            Assert.That(GetThreadState().X0, Is.EqualTo(result));
+            Assert.That(GetContext().GetX(0), Is.EqualTo(result));
         }
 
         [Explicit]
@@ -379,9 +353,9 @@ namespace Ryujinx.Tests.Cpu
         public void SanityCheck(ulong a)
         {
             uint opcode = 0xD503201F; // NOP
-            CpuThreadState threadState = SingleOpcode(opcode, x0: a);
+            ExecutionContext context = SingleOpcode(opcode, x0: a);
 
-            Assert.That(threadState.X0, Is.EqualTo(a));
+            Assert.That(context.GetX(0), Is.EqualTo(a));
         }
 #endif
     }
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index b446d953e..30dec59ac 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -1,10 +1,11 @@
 #define Simd
 
+using ARMeilleure.State;
+
 using NUnit.Framework;
 
 using System;
 using System.Collections.Generic;
-using System.Runtime.Intrinsics;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -1175,8 +1176,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x5EE0B800; // ABS D0, D0
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1194,8 +1195,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1213,8 +1214,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1230,8 +1231,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x5EF1B800; // ADDP D0, V0.2D
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1249,8 +1250,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -1268,8 +1269,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -1287,8 +1288,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1306,8 +1307,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1325,8 +1326,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1344,8 +1345,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1363,8 +1364,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1382,8 +1383,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1399,8 +1400,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x5EE09800; // CMEQ D0, D0, #0
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1418,8 +1419,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1437,8 +1438,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1454,8 +1455,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x7EE08800; // CMGE D0, D0, #0
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1473,8 +1474,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1492,8 +1493,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1509,8 +1510,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x5EE08800; // CMGT D0, D0, #0
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1528,8 +1529,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1547,8 +1548,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1564,8 +1565,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x7EE09800; // CMLE D0, D0, #0
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1583,8 +1584,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1602,8 +1603,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1619,8 +1620,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x5EE0A800; // CMLT D0, D0, #0
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1638,8 +1639,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1657,8 +1658,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1674,8 +1675,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x0E205800; // CNT V0.8B, V0.8B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1691,8 +1692,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4E205800; // CNT V0.16B, V0.16B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -1704,8 +1705,8 @@ namespace Ryujinx.Tests.Cpu
                                              [ValueSource("_1S_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1722,8 +1723,8 @@ namespace Ryujinx.Tests.Cpu
                                              [ValueSource("_1D_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1746,8 +1747,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1768,8 +1769,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1786,8 +1787,8 @@ namespace Ryujinx.Tests.Cpu
                                   [ValueSource("_2S_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1804,8 +1805,8 @@ namespace Ryujinx.Tests.Cpu
                                   [ValueSource("_1D_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1822,8 +1823,8 @@ namespace Ryujinx.Tests.Cpu
                                         [ValueSource("_1S_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1839,8 +1840,8 @@ namespace Ryujinx.Tests.Cpu
                                         [ValueSource("_1D_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1862,8 +1863,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1883,8 +1884,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1899,7 +1900,7 @@ namespace Ryujinx.Tests.Cpu
         public void F_Cmp_Cmpe_S_S([ValueSource("_F_Cmp_Cmpe_S_S_")] uint opcodes,
                                    [ValueSource("_1S_F_")] ulong a)
         {
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             bool v = TestContext.CurrentContext.Random.NextBool();
             bool c = TestContext.CurrentContext.Random.NextBool();
@@ -1915,7 +1916,7 @@ namespace Ryujinx.Tests.Cpu
         public void F_Cmp_Cmpe_S_D([ValueSource("_F_Cmp_Cmpe_S_D_")] uint opcodes,
                                    [ValueSource("_1D_F_")] ulong a)
         {
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             bool v = TestContext.CurrentContext.Random.NextBool();
             bool c = TestContext.CurrentContext.Random.NextBool();
@@ -1932,8 +1933,8 @@ namespace Ryujinx.Tests.Cpu
                                [ValueSource("_1S_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -1945,8 +1946,8 @@ namespace Ryujinx.Tests.Cpu
                                [ValueSource("_1D_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -1958,8 +1959,8 @@ namespace Ryujinx.Tests.Cpu
                                [ValueSource("_1S_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -1971,8 +1972,8 @@ namespace Ryujinx.Tests.Cpu
                                [ValueSource("_1H_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -1984,8 +1985,8 @@ namespace Ryujinx.Tests.Cpu
                                     [ValueSource("_1S_F_W_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -1997,8 +1998,8 @@ namespace Ryujinx.Tests.Cpu
                                     [ValueSource("_1D_F_X_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2016,8 +2017,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2033,8 +2034,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2053,8 +2054,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -2079,8 +2080,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2099,8 +2100,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -2125,8 +2126,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2143,7 +2144,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1);
 
@@ -2159,7 +2160,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x31: x31, v1: v1);
 
@@ -2175,7 +2176,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE1(a);
+            V128 v1 = MakeVectorE1(a);
 
             SingleOpcode(opcodes, x31: x31, v1: v1);
 
@@ -2192,7 +2193,7 @@ namespace Ryujinx.Tests.Cpu
 
             uint  w31 = TestContext.CurrentContext.Random.NextUInt();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0);
 
@@ -2209,7 +2210,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
+            V128 v0 = MakeVectorE1(z);
 
             SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
 
@@ -2226,7 +2227,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0(z);
+            V128 v0 = MakeVectorE0(z);
 
             SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
 
@@ -2238,8 +2239,8 @@ namespace Ryujinx.Tests.Cpu
                               [ValueSource("_1S_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2251,8 +2252,8 @@ namespace Ryujinx.Tests.Cpu
                               [ValueSource("_1D_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2265,8 +2266,8 @@ namespace Ryujinx.Tests.Cpu
                                        [Values(RMode.Rn)] RMode rMode)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -2285,8 +2286,8 @@ namespace Ryujinx.Tests.Cpu
                                        [Values(RMode.Rn)] RMode rMode)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -2311,8 +2312,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -2335,8 +2336,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -2354,8 +2355,8 @@ namespace Ryujinx.Tests.Cpu
                                      [ValueSource("_1S_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2367,8 +2368,8 @@ namespace Ryujinx.Tests.Cpu
                                      [ValueSource("_1D_F_")] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2386,8 +2387,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2403,8 +2404,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2417,8 +2418,8 @@ namespace Ryujinx.Tests.Cpu
                                   [Values] RMode rMode)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             int fpcr = (int)rMode << (int)Fpcr.RMode;
 
@@ -2433,8 +2434,8 @@ namespace Ryujinx.Tests.Cpu
                                   [Values] RMode rMode)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
 
             int fpcr = (int)rMode << (int)Fpcr.RMode;
 
@@ -2455,8 +2456,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             int fpcr = (int)rMode << (int)Fpcr.RMode;
 
@@ -2475,8 +2476,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             int fpcr = (int)rMode << (int)Fpcr.RMode;
 
@@ -2494,8 +2495,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x7EE0B800; // NEG D0, D0
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2513,8 +2514,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2532,8 +2533,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2549,8 +2550,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x2E205800; // NOT V0.8B, V0.8B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2566,8 +2567,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x6E205800; // NOT V0.16B, V0.16B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2583,8 +2584,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x2E605800; // RBIT V0.8B, V0.8B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2600,8 +2601,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x6E605800; // RBIT V0.16B, V0.16B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2617,8 +2618,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x0E201800; // REV16 V0.8B, V0.8B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2634,8 +2635,8 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4E201800; // REV16 V0.16B, V0.16B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2653,8 +2654,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2672,8 +2673,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2691,8 +2692,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2710,8 +2711,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2729,8 +2730,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2748,8 +2749,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2767,8 +2768,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2786,8 +2787,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2805,8 +2806,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2824,8 +2825,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2837,8 +2838,8 @@ namespace Ryujinx.Tests.Cpu
                                  [ValueSource("_1S_")] [Random(RndCnt)] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2850,8 +2851,8 @@ namespace Ryujinx.Tests.Cpu
                                  [ValueSource("_1D_")] [Random(RndCnt)] ulong a)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2869,8 +2870,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2886,8 +2887,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2903,8 +2904,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z0, z1);
-            Vector128<float> v1 = MakeVectorE0E1(a0, a1);
+            V128 v0 = MakeVectorE0E1(z0, z1);
+            V128 v1 = MakeVectorE0E1(a0, a1);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2920,8 +2921,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z0, z1);
-            Vector128<float> v1 = MakeVectorE0E1(a0, a1);
+            V128 v0 = MakeVectorE0E1(z0, z1);
+            V128 v1 = MakeVectorE0E1(a0, a1);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -2941,8 +2942,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((size & 3) << 22);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2960,8 +2961,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2979,8 +2980,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -2998,8 +2999,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3017,8 +3018,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3036,8 +3037,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3055,8 +3056,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3074,8 +3075,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3093,8 +3094,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3112,8 +3113,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3131,8 +3132,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3150,8 +3151,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3169,8 +3170,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3188,8 +3189,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3207,8 +3208,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3226,8 +3227,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3245,8 +3246,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3264,8 +3265,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3283,8 +3284,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3302,8 +3303,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3321,8 +3322,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3340,8 +3341,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3359,8 +3360,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3378,8 +3379,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3397,8 +3398,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3416,8 +3417,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3435,8 +3436,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -3454,8 +3455,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs
index 4702b986d..fd8ec9c57 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto.cs
@@ -1,11 +1,9 @@
 // https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
 
-using ChocolArm64.State;
+using ARMeilleure.State;
 
 using NUnit.Framework;
 
-using System.Runtime.Intrinsics;
-
 namespace Ryujinx.Tests.Cpu
 {
     public class CpuTestSimdCrypto : CpuTest
@@ -23,20 +21,20 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4E285800; // AESD V0.16B, V0.16B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH);
-            Vector128<float> v1 = MakeVectorE0E1(roundKeyL,          roundKeyH);
+            V128 v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH);
+            V128 v1 = MakeVectorE0E1(roundKeyL,          roundKeyH);
 
-            CpuThreadState threadState = SingleOpcode(opcode, v0: v0, v1: v1);
+            ExecutionContext context = SingleOpcode(opcode, v0: v0, v1: v1);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(resultL));
-                Assert.That(GetVectorE1(threadState.V0), Is.EqualTo(resultH));
+                Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL));
+                Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH));
             });
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(threadState.V1), Is.EqualTo(roundKeyL));
-                Assert.That(GetVectorE1(threadState.V1), Is.EqualTo(roundKeyH));
+                Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(roundKeyL));
+                Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(roundKeyH));
             });
 
             CompareAgainstUnicorn();
@@ -55,20 +53,20 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4E284800; // AESE V0.16B, V0.16B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH);
-            Vector128<float> v1 = MakeVectorE0E1(roundKeyL,          roundKeyH);
+            V128 v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH);
+            V128 v1 = MakeVectorE0E1(roundKeyL,          roundKeyH);
 
-            CpuThreadState threadState = SingleOpcode(opcode, v0: v0, v1: v1);
+            ExecutionContext context = SingleOpcode(opcode, v0: v0, v1: v1);
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(resultL));
-                Assert.That(GetVectorE1(threadState.V0), Is.EqualTo(resultH));
+                Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL));
+                Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH));
             });
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(threadState.V1), Is.EqualTo(roundKeyL));
-                Assert.That(GetVectorE1(threadState.V1), Is.EqualTo(roundKeyH));
+                Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(roundKeyL));
+                Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(roundKeyH));
             });
 
             CompareAgainstUnicorn();
@@ -85,24 +83,24 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4E287800; // AESIMC V0.16B, V0.16B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v = MakeVectorE0E1(valueL, valueH);
+            V128 v = MakeVectorE0E1(valueL, valueH);
 
-            CpuThreadState threadState = SingleOpcode(
+            ExecutionContext context = SingleOpcode(
                 opcode,
-                v0: rn == 0u ? v : default(Vector128<float>),
-                v1: rn == 1u ? v : default(Vector128<float>));
+                v0: rn == 0u ? v : default(V128),
+                v1: rn == 1u ? v : default(V128));
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(resultL));
-                Assert.That(GetVectorE1(threadState.V0), Is.EqualTo(resultH));
+                Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL));
+                Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH));
             });
             if (rn == 1u)
             {
                 Assert.Multiple(() =>
                 {
-                    Assert.That(GetVectorE0(threadState.V1), Is.EqualTo(valueL));
-                    Assert.That(GetVectorE1(threadState.V1), Is.EqualTo(valueH));
+                    Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(valueL));
+                    Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(valueH));
                 });
             }
 
@@ -120,24 +118,24 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4E286800; // AESMC V0.16B, V0.16B
             opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v = MakeVectorE0E1(valueL, valueH);
+            V128 v = MakeVectorE0E1(valueL, valueH);
 
-            CpuThreadState threadState = SingleOpcode(
+            ExecutionContext context = SingleOpcode(
                 opcode,
-                v0: rn == 0u ? v : default(Vector128<float>),
-                v1: rn == 1u ? v : default(Vector128<float>));
+                v0: rn == 0u ? v : default(V128),
+                v1: rn == 1u ? v : default(V128));
 
             Assert.Multiple(() =>
             {
-                Assert.That(GetVectorE0(threadState.V0), Is.EqualTo(resultL));
-                Assert.That(GetVectorE1(threadState.V0), Is.EqualTo(resultH));
+                Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL));
+                Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH));
             });
             if (rn == 1u)
             {
                 Assert.Multiple(() =>
                 {
-                    Assert.That(GetVectorE0(threadState.V1), Is.EqualTo(valueL));
-                    Assert.That(GetVectorE1(threadState.V1), Is.EqualTo(valueH));
+                    Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(valueL));
+                    Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(valueH));
                 });
             }
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs
index 8e2058553..17a2853f1 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs
@@ -1,10 +1,11 @@
 #define SimdCvt
 
+using ARMeilleure.State;
+
 using NUnit.Framework;
 
 using System;
 using System.Collections.Generic;
-using System.Runtime.Intrinsics;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -378,7 +379,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1);
 
@@ -394,7 +395,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x31: x31, v1: v1);
 
@@ -411,7 +412,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1);
 
@@ -427,7 +428,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x31: x31, v1: v1);
 
@@ -448,7 +449,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1);
 
@@ -468,7 +469,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (scale << 10);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x31: x31, v1: v1);
 
@@ -489,7 +490,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x0: x0, x31: w31, v1: v1);
 
@@ -509,7 +510,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (scale << 10);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, x31: x31, v1: v1);
 
@@ -526,7 +527,7 @@ namespace Ryujinx.Tests.Cpu
 
             uint  w31 = TestContext.CurrentContext.Random.NextUInt();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0);
 
@@ -543,7 +544,7 @@ namespace Ryujinx.Tests.Cpu
 
             uint  w31 = TestContext.CurrentContext.Random.NextUInt();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
+            V128 v0 = MakeVectorE1(z);
 
             SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0);
 
@@ -560,7 +561,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
 
@@ -577,7 +578,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
+            V128 v0 = MakeVectorE1(z);
 
             SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
 
@@ -598,7 +599,7 @@ namespace Ryujinx.Tests.Cpu
 
             uint  w31 = TestContext.CurrentContext.Random.NextUInt();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0);
 
@@ -619,7 +620,7 @@ namespace Ryujinx.Tests.Cpu
 
             uint  w31 = TestContext.CurrentContext.Random.NextUInt();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
+            V128 v0 = MakeVectorE1(z);
 
             SingleOpcode(opcodes, x1: wn, x31: w31, v0: v0);
 
@@ -640,7 +641,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
 
@@ -661,7 +662,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
+            V128 v0 = MakeVectorE1(z);
 
             SingleOpcode(opcodes, x1: xn, x31: x31, v0: v0);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs
index b8548169b..0ab40cad2 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdExt.cs
@@ -1,8 +1,8 @@
 #define SimdExt
 
-using NUnit.Framework;
+using ARMeilleure.State;
 
-using System.Runtime.Intrinsics;
+using NUnit.Framework;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -37,9 +37,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= (imm4 << 11);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -61,9 +61,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= (imm4 << 11);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs b/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs
index 4ccd43dbb..825a1c78c 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdFcond.cs
@@ -1,9 +1,10 @@
 #define SimdFcond
 
+using ARMeilleure.State;
+
 using NUnit.Framework;
 
 using System.Collections.Generic;
-using System.Runtime.Intrinsics;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -152,8 +153,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((cond & 15) << 12) | ((nzcv & 15) << 0);
 
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             bool v = TestContext.CurrentContext.Random.NextBool();
             bool c = TestContext.CurrentContext.Random.NextBool();
@@ -177,8 +178,8 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((cond & 15) << 12) | ((nzcv & 15) << 0);
 
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             bool v = TestContext.CurrentContext.Random.NextBool();
             bool c = TestContext.CurrentContext.Random.NextBool();
@@ -202,9 +203,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((cond & 15) << 12);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -223,9 +224,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((cond & 15) << 12);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs b/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs
index a7e0e0f96..534dba57d 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdFmov.cs
@@ -1,8 +1,8 @@
 #define SimdFmov
 
-using NUnit.Framework;
+using ARMeilleure.State;
 
-using System.Runtime.Intrinsics;
+using NUnit.Framework;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -36,7 +36,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((imm8 & 0xFFu) << 13);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcodes, v0: v0);
 
@@ -50,7 +50,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((imm8 & 0xFFu) << 13);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
+            V128 v0 = MakeVectorE1(z);
 
             SingleOpcode(opcodes, v0: v0);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs
index ce8f63bc6..1ea74a112 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdImm.cs
@@ -1,9 +1,10 @@
 #define SimdImm
 
+using ARMeilleure.State;
+
 using NUnit.Framework;
 
 using System.Collections.Generic;
-using System.Runtime.Intrinsics;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -203,7 +204,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((amount & 1) << 13);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcodes, v0: v0);
 
@@ -224,7 +225,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((amount & 3) << 13);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcodes, v0: v0);
 
@@ -241,7 +242,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (abc << 16) | (defgh << 5);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
+            V128 v0 = MakeVectorE1(z);
 
             SingleOpcode(opcodes, v0: v0);
 
@@ -288,7 +289,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(q == 0u ? z : 0ul);
+            V128 v0 = MakeVectorE1(q == 0u ? z : 0ul);
 
             SingleOpcode(opcodes, v0: v0);
 
@@ -309,7 +310,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(q == 0u ? z : 0ul);
+            V128 v0 = MakeVectorE1(q == 0u ? z : 0ul);
 
             SingleOpcode(opcodes, v0: v0);
 
@@ -330,7 +331,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(q == 0u ? z : 0ul);
+            V128 v0 = MakeVectorE1(q == 0u ? z : 0ul);
 
             SingleOpcode(opcodes, v0: v0);
 
@@ -351,7 +352,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(q == 0u ? z : 0ul);
+            V128 v0 = MakeVectorE1(q == 0u ? z : 0ul);
 
             SingleOpcode(opcodes, v0: v0);
 
@@ -370,7 +371,7 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (abc << 16) | (defgh << 5);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
+            V128 v0 = MakeVectorE1(z);
 
             SingleOpcode(opcodes, v0: v0);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs
index ea3727041..031ed0f2c 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs
@@ -1,8 +1,8 @@
 #define SimdIns
 
-using NUnit.Framework;
+using ARMeilleure.State;
 
-using System.Runtime.Intrinsics;
+using NUnit.Framework;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -86,7 +86,7 @@ namespace Ryujinx.Tests.Cpu
 
             uint  w31 = TestContext.CurrentContext.Random.NextUInt();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcode, x1: wn, x31: w31, v0: v0);
 
@@ -103,7 +103,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
             ulong z   = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcode, x1: xn, x31: x31, v0: v0);
 
@@ -122,8 +122,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -142,8 +142,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -162,8 +162,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -182,8 +182,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -207,8 +207,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -232,8 +232,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -257,8 +257,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -282,8 +282,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
             opcode |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -306,7 +306,7 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcode, x1: wn, x31: w31, v0: v0);
 
@@ -329,7 +329,7 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcode, x1: wn, x31: w31, v0: v0);
 
@@ -352,7 +352,7 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcode, x1: wn, x31: w31, v0: v0);
 
@@ -375,7 +375,7 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
+            V128 v0 = MakeVectorE0E1(z, z);
 
             SingleOpcode(opcode, x1: xn, x31: x31, v0: v0);
 
@@ -400,8 +400,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
             opcode |= (imm4 << 11);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -426,8 +426,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
             opcode |= (imm4 << 11);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -452,8 +452,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
             opcode |= (imm4 << 11);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -478,8 +478,8 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
             opcode |= (imm4 << 11);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, v0: v0, v1: v1);
 
@@ -502,7 +502,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
 
@@ -525,7 +525,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
 
@@ -547,7 +547,7 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x31: x31, v1: v1);
 
@@ -569,7 +569,7 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x31: x31, v1: v1);
 
@@ -591,7 +591,7 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x31: x31, v1: v1);
 
@@ -614,7 +614,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
 
@@ -637,7 +637,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
 
@@ -660,7 +660,7 @@ namespace Ryujinx.Tests.Cpu
 
             ulong x0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
             uint w31 = TestContext.CurrentContext.Random.NextUInt();
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x0: x0, x31: w31, v1: v1);
 
@@ -682,7 +682,7 @@ namespace Ryujinx.Tests.Cpu
             opcode |= (imm5 << 16);
 
             ulong x31 = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcode, x31: x31, v1: v1);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
index d9b828013..9b767db40 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
@@ -1,9 +1,10 @@
 #define SimdReg
 
+using ARMeilleure.State;
+
 using NUnit.Framework;
 
 using System.Collections.Generic;
-using System.Runtime.Intrinsics;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -570,9 +571,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x5EE08400; // ADD D0, D0, D0
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -592,9 +593,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -614,9 +615,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -636,9 +637,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -658,9 +659,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -680,9 +681,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -702,9 +703,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -722,9 +723,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x0E201C00; // AND V0.8B, V0.8B, V0.8B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -742,9 +743,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4E201C00; // AND V0.16B, V0.16B, V0.16B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -762,9 +763,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x0E601C00; // BIC V0.8B, V0.8B, V0.8B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -782,9 +783,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4E601C00; // BIC V0.16B, V0.16B, V0.16B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -802,9 +803,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x2EE01C00; // BIF V0.8B, V0.8B, V0.8B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -822,9 +823,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x6EE01C00; // BIF V0.16B, V0.16B, V0.16B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -842,9 +843,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x2EA01C00; // BIT V0.8B, V0.8B, V0.8B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -862,9 +863,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x6EA01C00; // BIT V0.16B, V0.16B, V0.16B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -882,9 +883,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x2E601C00; // BSL V0.8B, V0.8B, V0.8B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -902,9 +903,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x6E601C00; // BSL V0.16B, V0.16B, V0.16B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -922,9 +923,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x7EE08C00; // CMEQ D0, D0, D0
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -944,9 +945,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -966,9 +967,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -986,9 +987,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x5EE03C00; // CMGE D0, D0, D0
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1008,9 +1009,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1030,9 +1031,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1050,9 +1051,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x5EE03400; // CMGT D0, D0, D0
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1072,9 +1073,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1094,9 +1095,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1114,9 +1115,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x7EE03400; // CMHI D0, D0, D0
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1136,9 +1137,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1158,9 +1159,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1178,9 +1179,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x7EE03C00; // CMHS D0, D0, D0
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1200,9 +1201,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1222,9 +1223,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1242,9 +1243,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x5EE08C00; // CMTST D0, D0, D0
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1264,9 +1265,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1286,9 +1287,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1306,9 +1307,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x2E201C00; // EOR V0.8B, V0.8B, V0.8B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1326,9 +1327,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x6E201C00; // EOR V0.16B, V0.16B, V0.16B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1341,9 +1342,9 @@ namespace Ryujinx.Tests.Cpu
                                                         [ValueSource("_1S_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1361,9 +1362,9 @@ namespace Ryujinx.Tests.Cpu
                                                         [ValueSource("_1D_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1388,9 +1389,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * q);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1413,9 +1414,9 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1433,9 +1434,9 @@ namespace Ryujinx.Tests.Cpu
                                     [ValueSource("_1S_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1452,9 +1453,9 @@ namespace Ryujinx.Tests.Cpu
                                     [ValueSource("_1D_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1478,9 +1479,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * q);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1502,9 +1503,9 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1520,8 +1521,8 @@ namespace Ryujinx.Tests.Cpu
                                    [ValueSource("_1S_F_")] ulong a,
                                    [ValueSource("_1S_F_")] ulong b)
         {
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             bool v = TestContext.CurrentContext.Random.NextBool();
             bool c = TestContext.CurrentContext.Random.NextBool();
@@ -1538,8 +1539,8 @@ namespace Ryujinx.Tests.Cpu
                                    [ValueSource("_1D_F_")] ulong a,
                                    [ValueSource("_1D_F_")] ulong b)
         {
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             bool v = TestContext.CurrentContext.Random.NextBool();
             bool c = TestContext.CurrentContext.Random.NextBool();
@@ -1558,10 +1559,10 @@ namespace Ryujinx.Tests.Cpu
                                                 [ValueSource("_1S_F_")] ulong c)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
-            Vector128<float> v3 = MakeVectorE0(c);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
+            V128 v3 = MakeVectorE0(c);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1580,10 +1581,10 @@ namespace Ryujinx.Tests.Cpu
                                                 [ValueSource("_1D_F_")] ulong c)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
-            Vector128<float> v3 = MakeVectorE0(c);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
+            V128 v3 = MakeVectorE0(c);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1601,9 +1602,9 @@ namespace Ryujinx.Tests.Cpu
                                      [ValueSource("_1S_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1621,9 +1622,9 @@ namespace Ryujinx.Tests.Cpu
                                      [ValueSource("_1D_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1648,9 +1649,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * q);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1673,9 +1674,9 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1700,9 +1701,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * q);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1725,9 +1726,9 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1745,9 +1746,9 @@ namespace Ryujinx.Tests.Cpu
                                        [ValueSource("_1S_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1765,9 +1766,9 @@ namespace Ryujinx.Tests.Cpu
                                        [ValueSource("_1D_F_")] ulong b)
         {
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1792,9 +1793,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * q);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1817,9 +1818,9 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -1844,9 +1845,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -1866,9 +1867,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -1886,9 +1887,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x0EE01C00; // ORN V0.8B, V0.8B, V0.8B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1906,9 +1907,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4EE01C00; // ORN V0.16B, V0.16B, V0.16B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1926,9 +1927,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x0EA01C00; // ORR V0.8B, V0.8B, V0.8B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1946,9 +1947,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x4EA01C00; // ORR V0.16B, V0.16B, V0.16B
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1968,9 +1969,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -1990,9 +1991,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2012,9 +2013,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2034,9 +2035,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2056,9 +2057,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2078,9 +2079,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2100,9 +2101,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2122,9 +2123,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE1(a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2144,9 +2145,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2166,9 +2167,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2188,9 +2189,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2210,9 +2211,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE1(a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2232,9 +2233,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2254,9 +2255,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE1(a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2276,9 +2277,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2298,9 +2299,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2318,9 +2319,9 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z0, z1);
-            Vector128<float> v1 = MakeVectorE0E1(a0, a1);
-            Vector128<float> v2 = MakeVectorE0E1(b0, b1);
+            V128 v0 = MakeVectorE0E1(z0, z1);
+            V128 v1 = MakeVectorE0E1(a0, a1);
+            V128 v2 = MakeVectorE0E1(b0, b1);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -2338,9 +2339,9 @@ namespace Ryujinx.Tests.Cpu
         {
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z0, z1);
-            Vector128<float> v1 = MakeVectorE0E1(a0, a1);
-            Vector128<float> v2 = MakeVectorE0E1(b0, b1);
+            V128 v0 = MakeVectorE0E1(z0, z1);
+            V128 v1 = MakeVectorE0E1(a0, a1);
+            V128 v2 = MakeVectorE0E1(b0, b1);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -2360,9 +2361,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2382,9 +2383,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2404,9 +2405,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2426,9 +2427,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2450,9 +2451,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((size & 3) << 22);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -2472,9 +2473,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -2494,9 +2495,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE1(a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -2516,9 +2517,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2538,9 +2539,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2560,9 +2561,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2582,9 +2583,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2604,9 +2605,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2626,9 +2627,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2648,9 +2649,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2670,9 +2671,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2692,9 +2693,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2714,9 +2715,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2736,9 +2737,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2758,9 +2759,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2780,9 +2781,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2802,9 +2803,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2824,9 +2825,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -2846,9 +2847,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -2868,9 +2869,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2890,9 +2891,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE1(a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2912,9 +2913,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2934,9 +2935,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2954,9 +2955,9 @@ namespace Ryujinx.Tests.Cpu
             uint opcode = 0x7EE08400; // SUB D0, D0, D0
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2976,9 +2977,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -2998,9 +2999,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3020,9 +3021,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3042,9 +3043,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3064,9 +3065,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3086,9 +3087,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
-            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
-            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3108,9 +3109,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3130,9 +3131,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
-            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
-            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3152,9 +3153,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3174,9 +3175,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3196,9 +3197,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3218,9 +3219,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE1(a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3240,9 +3241,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3262,9 +3263,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3284,9 +3285,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3306,9 +3307,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE1(a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3328,9 +3329,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3350,9 +3351,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE1(a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3372,9 +3373,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3394,9 +3395,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3416,9 +3417,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3438,9 +3439,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3460,9 +3461,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3482,9 +3483,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3504,9 +3505,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3526,9 +3527,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3548,9 +3549,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3570,9 +3571,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3592,9 +3593,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3614,9 +3615,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3636,9 +3637,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3658,9 +3659,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3680,9 +3681,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3702,9 +3703,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE1(a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE1(a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3724,9 +3725,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3746,9 +3747,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE1(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE1(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3768,9 +3769,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3790,9 +3791,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
-            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
-            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3812,9 +3813,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3834,9 +3835,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
-            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
-            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3856,9 +3857,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3878,9 +3879,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
-            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
-            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3900,9 +3901,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0(b);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0(b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
@@ -3922,9 +3923,9 @@ namespace Ryujinx.Tests.Cpu
             opcode |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcode |= ((size & 3) << 22);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, ~z);
-            Vector128<float> v1 = MakeVectorE0E1(a, ~a);
-            Vector128<float> v2 = MakeVectorE0E1(b, ~b);
+            V128 v0 = MakeVectorE0E1(z, ~z);
+            V128 v1 = MakeVectorE0E1(a, ~a);
+            V128 v2 = MakeVectorE0E1(b, ~b);
 
             SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
index 64f9bc6cc..23e0e3646 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
@@ -1,8 +1,8 @@
 #define SimdRegElem
 
-using NUnit.Framework;
+using ARMeilleure.State;
 
-using System.Runtime.Intrinsics;
+using NUnit.Framework;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -95,9 +95,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (l << 21) | (m << 20) | (h << 11);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -122,9 +122,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (l << 21) | (h << 11);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -150,9 +150,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (l << 21) | (m << 20) | (h << 11);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -177,9 +177,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (l << 21) | (h << 11);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs
index 51027195b..38197fd5f 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs
@@ -1,9 +1,10 @@
 #define SimdRegElemF
 
+using ARMeilleure.State;
+
 using NUnit.Framework;
 
 using System.Collections.Generic;
-using System.Runtime.Intrinsics;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -230,9 +231,9 @@ namespace Ryujinx.Tests.Cpu
 
             opcodes |= (l << 21) | (h << 11);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -255,9 +256,9 @@ namespace Ryujinx.Tests.Cpu
 
             opcodes |= h << 11;
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -287,9 +288,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (l << 21) | (h << 11);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -316,9 +317,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= h << 11;
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -342,9 +343,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (l << 21) | (h << 11);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -367,9 +368,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= h << 11;
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE1(z);
-            Vector128<float> v1 = MakeVectorE0(a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE1(z);
+            V128 v1 = MakeVectorE0(a);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -399,9 +400,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (l << 21) | (h << 11);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
@@ -428,9 +429,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= h << 11;
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
-            Vector128<float> v2 = MakeVectorE0E1(b, b * h);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
+            V128 v2 = MakeVectorE0E1(b, b * h);
 
             int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
index 54ed044d9..fbbc9f9fb 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs
@@ -1,10 +1,11 @@
 #define SimdShImm
 
+using ARMeilleure.State;
+
 using NUnit.Framework;
 
 using System;
 using System.Collections.Generic;
-using System.Runtime.Intrinsics;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -488,8 +489,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -509,8 +510,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= (immHb << 16);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -532,8 +533,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -553,8 +554,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= (immHb << 16);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -574,8 +575,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= (immHb << 16);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -597,8 +598,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -620,8 +621,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -643,8 +644,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -664,8 +665,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= (immHb << 16);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -687,8 +688,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -710,8 +711,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -733,8 +734,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -754,8 +755,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= (immHb << 16);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -777,8 +778,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -800,8 +801,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -823,8 +824,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a * q);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a * q);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -844,8 +845,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= (immHb << 16);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -867,8 +868,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -890,8 +891,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -913,8 +914,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(a, a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(a, a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -934,8 +935,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= (immHb << 16);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -955,8 +956,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= (immHb << 16);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -976,8 +977,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
             opcodes |= (immHb << 16);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -999,8 +1000,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -1022,8 +1023,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
@@ -1045,8 +1046,8 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= (immHb << 16);
             opcodes |= ((q & 1) << 30);
 
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0(a);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0(a);
 
             SingleOpcode(opcodes, v0: v0, v1: v1);
 
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs b/Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs
index 69195af20..5e6546aba 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdTbl.cs
@@ -1,9 +1,10 @@
 #define SimdTbl
 
+using ARMeilleure.State;
+
 using NUnit.Framework;
 
 using System.Collections.Generic;
-using System.Runtime.Intrinsics;
 
 namespace Ryujinx.Tests.Cpu
 {
@@ -146,9 +147,9 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(table0, table0);
-            Vector128<float> v2 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(table0, table0);
+            V128 v2 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
 
@@ -169,10 +170,10 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(table0, table0);
-            Vector128<float> v2 = MakeVectorE0E1(table1, table1);
-            Vector128<float> v3 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(table0, table0);
+            V128 v2 = MakeVectorE0E1(table1, table1);
+            V128 v3 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3);
 
@@ -193,10 +194,10 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v30 = MakeVectorE0E1(z, z);
-            Vector128<float> v31 = MakeVectorE0E1(table0, table0);
-            Vector128<float> v0  = MakeVectorE0E1(table1, table1);
-            Vector128<float> v1  = MakeVectorE0E1(indexes, indexes);
+            V128 v30 = MakeVectorE0E1(z, z);
+            V128 v31 = MakeVectorE0E1(table0, table0);
+            V128 v0  = MakeVectorE0E1(table1, table1);
+            V128 v1  = MakeVectorE0E1(indexes, indexes);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v30: v30, v31: v31);
 
@@ -218,11 +219,11 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(table0, table0);
-            Vector128<float> v2 = MakeVectorE0E1(table1, table1);
-            Vector128<float> v3 = MakeVectorE0E1(table2, table2);
-            Vector128<float> v4 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(table0, table0);
+            V128 v2 = MakeVectorE0E1(table1, table1);
+            V128 v3 = MakeVectorE0E1(table2, table2);
+            V128 v4 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4);
 
@@ -244,11 +245,11 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v30 = MakeVectorE0E1(z, z);
-            Vector128<float> v31 = MakeVectorE0E1(table0, table0);
-            Vector128<float> v0  = MakeVectorE0E1(table1, table1);
-            Vector128<float> v1  = MakeVectorE0E1(table2, table2);
-            Vector128<float> v2  = MakeVectorE0E1(indexes, indexes);
+            V128 v30 = MakeVectorE0E1(z, z);
+            V128 v31 = MakeVectorE0E1(table0, table0);
+            V128 v0  = MakeVectorE0E1(table1, table1);
+            V128 v1  = MakeVectorE0E1(table2, table2);
+            V128 v2  = MakeVectorE0E1(indexes, indexes);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v30: v30, v31: v31);
 
@@ -271,12 +272,12 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v0 = MakeVectorE0E1(z, z);
-            Vector128<float> v1 = MakeVectorE0E1(table0, table0);
-            Vector128<float> v2 = MakeVectorE0E1(table1, table1);
-            Vector128<float> v3 = MakeVectorE0E1(table2, table2);
-            Vector128<float> v4 = MakeVectorE0E1(table3, table3);
-            Vector128<float> v5 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
+            V128 v0 = MakeVectorE0E1(z, z);
+            V128 v1 = MakeVectorE0E1(table0, table0);
+            V128 v2 = MakeVectorE0E1(table1, table1);
+            V128 v3 = MakeVectorE0E1(table2, table2);
+            V128 v4 = MakeVectorE0E1(table3, table3);
+            V128 v5 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4, v5: v5);
 
@@ -299,12 +300,12 @@ namespace Ryujinx.Tests.Cpu
             opcodes |= ((q & 1) << 30);
 
             ulong z = TestContext.CurrentContext.Random.NextULong();
-            Vector128<float> v30 = MakeVectorE0E1(z, z);
-            Vector128<float> v31 = MakeVectorE0E1(table0, table0);
-            Vector128<float> v0  = MakeVectorE0E1(table1, table1);
-            Vector128<float> v1  = MakeVectorE0E1(table2, table2);
-            Vector128<float> v2  = MakeVectorE0E1(table3, table3);
-            Vector128<float> v3  = MakeVectorE0E1(indexes, indexes);
+            V128 v30 = MakeVectorE0E1(z, z);
+            V128 v31 = MakeVectorE0E1(table0, table0);
+            V128 v0  = MakeVectorE0E1(table1, table1);
+            V128 v1  = MakeVectorE0E1(table2, table2);
+            V128 v2  = MakeVectorE0E1(table3, table3);
+            V128 v3  = MakeVectorE0E1(indexes, indexes);
 
             SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3, v30: v30, v31: v31);
 
diff --git a/Ryujinx.Tests/Ryujinx.Tests.csproj b/Ryujinx.Tests/Ryujinx.Tests.csproj
index fd305a4ad..0ebc81960 100644
--- a/Ryujinx.Tests/Ryujinx.Tests.csproj
+++ b/Ryujinx.Tests/Ryujinx.Tests.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <OutputType>Exe</OutputType>
     <IsPackable>false</IsPackable>
 
@@ -30,12 +30,11 @@
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.2.0" />
     <PackageReference Include="NUnit" Version="3.12.0" />
     <PackageReference Include="NUnit3TestAdapter" Version="3.13.0" />
-    <PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\ChocolArm64\ChocolArm64.csproj" />
     <ProjectReference Include="..\Ryujinx.Tests.Unicorn\Ryujinx.Tests.Unicorn.csproj" />
+    <ProjectReference Include="..\ARMeilleure\ARMeilleure.csproj" />
   </ItemGroup>
 
   <Target Name="CopyUnicorn" AfterTargets="Build">
diff --git a/Ryujinx.sln b/Ryujinx.sln
index b928a06d6..8177f8617 100644
--- a/Ryujinx.sln
+++ b/Ryujinx.sln
@@ -28,7 +28,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Common", "Ryujinx.C
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Profiler", "Ryujinx.Profiler\Ryujinx.Profiler.csproj", "{4E69B67F-8CA7-42CF-A9E1-CCB0915DFB34}"
 EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{464D8AB7-B056-4A99-B207-B8DCFB47AAA9}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ARMeilleure", "ARMeilleure\ARMeilleure.csproj", "{ABF09A5E-2D8B-4B6F-A51D-5CE414DDB15A}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -130,10 +130,6 @@ Global
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
-	GlobalSection(NestedProjects) = preSolution
-		{EBB55AEA-C7D7-4DEB-BF96-FA1789E225E9} = {464D8AB7-B056-4A99-B207-B8DCFB47AAA9}
-		{D8F72938-78EF-4E8C-BAFE-531C9C3C8F15} = {464D8AB7-B056-4A99-B207-B8DCFB47AAA9}
-	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {110169B3-3328-4730-8AB0-BA05BEF75C1A}
 	EndGlobalSection
diff --git a/Ryujinx/Config.jsonc b/Ryujinx/Config.jsonc
index 454c52ae8..2acb7f38d 100644
--- a/Ryujinx/Config.jsonc
+++ b/Ryujinx/Config.jsonc
@@ -21,7 +21,7 @@
 
     // Enable printing guest logs
     "logging_enable_guest": true,
-    
+
     // Enable printing FS access logs. fs_global_access_log_mode must be 2 or 3
     "logging_enable_fs_access_log": false,
 
@@ -53,8 +53,8 @@
     // Sets the "GlobalAccessLogMode". Possible modes are 0-3
     "fs_global_access_log_mode": 0,
 
-    // Enable or disable aggressive CPU optimizations
-    "enable_aggressive_cpu_opts": true,
+    // Use old ChocolArm64 ARM emulator
+    "enable_legacy_jit": false,
 
     // Enable or disable ignoring missing services, this may cause instability
     "ignore_missing_services": false,
diff --git a/Ryujinx/Configuration.cs b/Ryujinx/Configuration.cs
index c15fff2aa..7c9182052 100644
--- a/Ryujinx/Configuration.cs
+++ b/Ryujinx/Configuration.cs
@@ -1,3 +1,4 @@
+using ARMeilleure;
 using LibHac.Fs;
 using OpenTK.Input;
 using Ryujinx.Common;
@@ -108,9 +109,9 @@ namespace Ryujinx
         public int FsGlobalAccessLogMode { get; private set; }
 
         /// <summary>
-        /// Enable or Disable aggressive CPU optimizations
+        /// Use old ChocolArm64 ARM emulator
         /// </summary>
-        public bool EnableAggressiveCpuOpts { get; private set; }
+        public bool EnableLegacyJit { get; private set; }
 
         /// <summary>
         /// Enable or disable ignoring missing services
@@ -239,10 +240,7 @@ namespace Ryujinx
 
             device.System.GlobalAccessLogMode = Instance.FsGlobalAccessLogMode;
 
-            if (Instance.EnableAggressiveCpuOpts)
-            {
-                Optimizations.AssumeStrictAbiCompliance = true;
-            }
+            device.System.UseLegacyJit = Instance.EnableLegacyJit;
 
             ServiceConfiguration.IgnoreMissingServices = Instance.IgnoreMissingServices;
 
diff --git a/Ryujinx/Ryujinx.csproj b/Ryujinx/Ryujinx.csproj
index 4ff06fa07..80b03f46b 100644
--- a/Ryujinx/Ryujinx.csproj
+++ b/Ryujinx/Ryujinx.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <TargetFramework>netcoreapp2.1</TargetFramework>
-    <RuntimeIdentifiers>win10-x64;osx-x64;linux-x64</RuntimeIdentifiers>
+    <RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
     <OutputType>Exe</OutputType>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <Configurations>Debug;Release;Profile Debug;Profile Release</Configurations>
@@ -24,12 +24,12 @@
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\ChocolArm64\ChocolArm64.csproj" />
     <ProjectReference Include="..\Ryujinx.Audio\Ryujinx.Audio.csproj" />
     <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
     <ProjectReference Include="..\Ryujinx.Graphics\Ryujinx.Graphics.csproj" />
     <ProjectReference Include="..\Ryujinx.HLE\Ryujinx.HLE.csproj" />
     <ProjectReference Include="..\Ryujinx.Profiler\Ryujinx.Profiler.csproj" />
+    <ProjectReference Include="..\ARMeilleure\ARMeilleure.csproj" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/Ryujinx/_schema.json b/Ryujinx/_schema.json
index cdaf52682..c1a64c674 100644
--- a/Ryujinx/_schema.json
+++ b/Ryujinx/_schema.json
@@ -20,7 +20,7 @@
     "enable_multicore_scheduling",
     "enable_fs_integrity_checks",
     "fs_global_access_log_mode",
-    "enable_aggressive_cpu_opts",
+    "enable_legacy_jit",
     "controller_type",
     "enable_keyboard",
     "keyboard_controls",
@@ -462,7 +462,7 @@
       "$id": "#/properties/fs_global_access_log_mode",
       "type": "integer",
       "title": "Enable FS access log",
-      "description": "Enables FS access log output. Possible modes are 0-3. Modes 2 and 3 output to the console.",
+      "description": "Enables FS access log output. Possible modes are 0-3. Modes 2 and 3 output to the console",
       "default": 0,
       "minimum": 0,
       "examples": [
@@ -472,12 +472,12 @@
         3
       ]
     },
-    "enable_aggressive_cpu_opts": {
-      "$id": "#/properties/enable_aggressive_cpu_opts",
+    "enable_legacy_jit": {
+      "$id": "#/properties/enable_legacy_jit",
       "type": "boolean",
-      "title": "Enable Aggressive CPU Optimizations",
-      "description": "Enable or disable aggressive CPU optimizations",
-      "default": true,
+      "title": "Enable legacy JIT",
+      "description": "Use old ChocolArm64 ARM emulator",
+      "default": false,
       "examples": [
         true,
         false