diff --git a/Ryujinx/Cpu/AOpCodeTable.cs b/Ryujinx/Cpu/AOpCodeTable.cs
index 267b0d80b..ffdf30a95 100644
--- a/Ryujinx/Cpu/AOpCodeTable.cs
+++ b/Ryujinx/Cpu/AOpCodeTable.cs
@@ -168,7 +168,7 @@ namespace ChocolArm64
             Set("00011110xx1xxxxx010110xxxxxxxxxx", AInstEmit.Fmin_S,        typeof(AOpCodeSimdReg));
             Set("00011110xx1xxxxx011110xxxxxxxxxx", AInstEmit.Fminnm_S,      typeof(AOpCodeSimdReg));
             Set("0x0011100x1xxxxx110011xxxxxxxxxx", AInstEmit.Fmla_V,        typeof(AOpCodeSimdReg));
-            Set("0x0011111<<xxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Vs,       typeof(AOpCodeSimdRegElem));
+            Set("0x0011111<<xxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Ve,       typeof(AOpCodeSimdRegElem));
             Set("00011110xx100000010000xxxxxxxxxx", AInstEmit.Fmov_S,        typeof(AOpCodeSimd));
             Set("00011110xx1xxxxxxxx100xxxxxxxxxx", AInstEmit.Fmov_Si,       typeof(AOpCodeSimdFmov));
             Set("0xx0111100000xxx111101xxxxxxxxxx", AInstEmit.Fmov_V,        typeof(AOpCodeSimdImm));
@@ -179,7 +179,7 @@ namespace ChocolArm64
             Set("00011111xx0xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Fmsub_S,       typeof(AOpCodeSimdReg));
             Set("00011110xx1xxxxx000010xxxxxxxxxx", AInstEmit.Fmul_S,        typeof(AOpCodeSimdReg));
             Set("0x1011100x1xxxxx110111xxxxxxxxxx", AInstEmit.Fmul_V,        typeof(AOpCodeSimdReg));
-            Set("0x0011111<<xxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Vs,       typeof(AOpCodeSimdRegElem));
+            Set("0x0011111<<xxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Ve,       typeof(AOpCodeSimdRegElem));
             Set("00011110xx100001010000xxxxxxxxxx", AInstEmit.Fneg_S,        typeof(AOpCodeSimdReg));
             Set("00011110xx1xxxxx100010xxxxxxxxxx", AInstEmit.Fnmul_S,       typeof(AOpCodeSimdReg));
             Set("00011110xx100110010000xxxxxxxxxx", AInstEmit.Frinta_S,      typeof(AOpCodeSimd));
@@ -243,13 +243,15 @@ namespace ChocolArm64
             Set("01101110<<110000001110xxxxxxxxxx", AInstEmit.Uaddlv_V,      typeof(AOpCodeSimd));
             Set("0x101110<<1xxxxx000100xxxxxxxxxx", AInstEmit.Uaddw_V,       typeof(AOpCodeSimdReg));
             Set("x0011110xx100011000000xxxxxxxxxx", AInstEmit.Ucvtf_Gp,      typeof(AOpCodeSimdCvt));
-            Set("011111100x100001110110xxxxxxxxxx", AInstEmit.Ucvtf_V,       typeof(AOpCodeSimdReg));
+            Set("011111100x100001110110xxxxxxxxxx", AInstEmit.Ucvtf_S,       typeof(AOpCodeSimd));
+            Set("0x1011100x100001110110xxxxxxxxxx", AInstEmit.Ucvtf_V,       typeof(AOpCodeSimd));
             Set("0x001110000xxxxx001111xxxxxxxxxx", AInstEmit.Umov_S,        typeof(AOpCodeSimdIns));
             Set("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V,        typeof(AOpCodeSimdReg));
             Set("0x10111100>>>xxx101001xxxxxxxxxx", AInstEmit.Ushll_V,       typeof(AOpCodeSimdShImm));
             Set("0x1011110>>>>xxx000001xxxxxxxxxx", AInstEmit.Ushr_V,        typeof(AOpCodeSimdShImm));
             Set("0x1011110>>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V,        typeof(AOpCodeSimdShImm));
             Set("0x001110xx0xxxxx000110xxxxxxxxxx", AInstEmit.Uzp1_V,        typeof(AOpCodeSimdReg));
+            Set("0x001110xx0xxxxx010110xxxxxxxxxx", AInstEmit.Uzp2_V,        typeof(AOpCodeSimdReg));
             Set("0x001110<<100001001010xxxxxxxxxx", AInstEmit.Xtn_V,         typeof(AOpCodeSimd));
 #endregion
         }
@@ -317,7 +319,7 @@ namespace ChocolArm64
             }
             else if (ZCount != 0 && OCount != 0)
             {
-                //When both the > and the <, then a value is blacklisted,
+                //When both the > and the < are used, then a value is blacklisted,
                 //with > indicating 0, and < indicating 1. So, for example, ><<
                 //blacklists the pattern 011, but 000, 001, 010, 100, 101,
                 //110 and 111 are valid.
diff --git a/Ryujinx/Cpu/Decoder/AOpCodeMem.cs b/Ryujinx/Cpu/Decoder/AOpCodeMem.cs
index 5ec72125a..be5367cf6 100644
--- a/Ryujinx/Cpu/Decoder/AOpCodeMem.cs
+++ b/Ryujinx/Cpu/Decoder/AOpCodeMem.cs
@@ -4,10 +4,10 @@ namespace ChocolArm64.Decoder
 {
     class AOpCodeMem : AOpCode
     {
-        public    int  Rt       { get; protected set; }
-        public    int  Rn       { get; protected set; }
-        public    int  Size     { get; protected set; }
-        public    bool Extend64 { get; protected set; }
+        public int  Rt       { get; protected set; }
+        public int  Rn       { get; protected set; }
+        public int  Size     { get; protected set; }
+        public bool Extend64 { get; protected set; }
 
         public AOpCodeMem(AInst Inst, long Position, int OpCode) : base(Inst, Position, OpCode)
         {
diff --git a/Ryujinx/Cpu/Decoder/AOpCodeSimdMemMs.cs b/Ryujinx/Cpu/Decoder/AOpCodeSimdMemMs.cs
index 635ec91e4..7c285b28d 100644
--- a/Ryujinx/Cpu/Decoder/AOpCodeSimdMemMs.cs
+++ b/Ryujinx/Cpu/Decoder/AOpCodeSimdMemMs.cs
@@ -3,12 +3,8 @@ using ChocolArm64.State;
 
 namespace ChocolArm64.Decoder
 {
-    class AOpCodeSimdMemMs : AOpCode, IAOpCodeSimd
+    class AOpCodeSimdMemMs : AOpCodeMemReg, IAOpCodeSimd
     {
-        public int  Rt     { get; private set; }
-        public int  Rn     { get; private set; }
-        public int  Size   { get; private set; }
-        public int  Rm     { get; private set; }
         public int  Reps   { get; private set; }
         public int  SElems { get; private set; }
         public int  Elems  { get; private set; }
@@ -29,10 +25,7 @@ namespace ChocolArm64.Decoder
                 default: Inst = AInst.Undefined; return;
             }
 
-            Rt    =  (OpCode >>  0) & 0x1f;
-            Rn    =  (OpCode >>  5) & 0x1f;
             Size  =  (OpCode >> 10) & 0x3;
-            Rm    =  (OpCode >> 16) & 0x1f;
             WBack = ((OpCode >> 23) & 0x1) != 0;
 
             bool Q = ((OpCode >> 30) & 1) != 0;
diff --git a/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs b/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs
index 5bad95edd..5782d54bd 100644
--- a/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs
+++ b/Ryujinx/Cpu/Decoder/AOpCodeSimdMemSs.cs
@@ -3,12 +3,8 @@ using ChocolArm64.State;
 
 namespace ChocolArm64.Decoder
 {
-    class AOpCodeSimdMemSs : AOpCode, IAOpCodeSimd
+    class AOpCodeSimdMemSs : AOpCodeMemReg, IAOpCodeSimd
     {
-        public int  Rt        { get; private set; }
-        public int  Rn        { get; private set; }
-        public int  Size      { get; private set; }
-        public int  Rm        { get; private set; }
         public int  SElems    { get; private set; }
         public int  Index     { get; private set; }
         public bool Replicate { get; private set; }
@@ -91,9 +87,6 @@ namespace ChocolArm64.Decoder
             this.SElems = SElems;
             this.Size   = Scale;
 
-            Rt    =  (OpCode >>  0) & 0x1f;
-            Rn    =  (OpCode >>  5) & 0x1f;
-            Rm    =  (OpCode >> 16) & 0x1f;
             WBack = ((OpCode >> 23) & 0x1) != 0;
 
             RegisterSize = Q != 0
diff --git a/Ryujinx/Cpu/Decoder/AOpCodeSimdRegElem.cs b/Ryujinx/Cpu/Decoder/AOpCodeSimdRegElem.cs
index 828fe7884..d878b12ea 100644
--- a/Ryujinx/Cpu/Decoder/AOpCodeSimdRegElem.cs
+++ b/Ryujinx/Cpu/Decoder/AOpCodeSimdRegElem.cs
@@ -2,17 +2,13 @@ using ChocolArm64.Instruction;
 
 namespace ChocolArm64.Decoder
 {
-    class AOpCodeSimdRegElem : AOpCodeSimd
+    class AOpCodeSimdRegElem : AOpCodeSimdReg
     {
-        public int Rm    { get; private set; }
         public int Index { get; private set; }
 
         public AOpCodeSimdRegElem(AInst Inst, long Position, int OpCode) : base(Inst, Position, OpCode)
         {
-            Rm   = (OpCode >> 16) & 0x1f;
-            Size = (OpCode >> 22) & 0x1;
-
-            if (Size != 0)
+            if ((Size & 1) != 0)
             {
                 Index = (OpCode >> 11) & 1;
             }
diff --git a/Ryujinx/Cpu/Instruction/AInstEmitAluHelper.cs b/Ryujinx/Cpu/Instruction/AInstEmitAluHelper.cs
index b526c553f..5dc3babb9 100644
--- a/Ryujinx/Cpu/Instruction/AInstEmitAluHelper.cs
+++ b/Ryujinx/Cpu/Instruction/AInstEmitAluHelper.cs
@@ -21,13 +21,22 @@ namespace ChocolArm64.Instruction
 
         public static void EmitAddsVCheck(AILEmitterCtx Context)
         {
-            //V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0
-            Context.Emit(OpCodes.Dup);
+            //V = (Rd ^ Rn) & (Rd ^ Rm) & ~(Rn ^ Rm) < 0
+            Context.EmitSttmp();
+            Context.EmitLdtmp();
+            Context.EmitLdtmp();
 
             EmitDataLoadRn(Context);
 
             Context.Emit(OpCodes.Xor);
 
+            Context.EmitLdtmp();
+
+            EmitDataLoadOper2(Context);
+
+            Context.Emit(OpCodes.Xor);
+            Context.Emit(OpCodes.And);
+
             EmitDataLoadOpers(Context);
 
             Context.Emit(OpCodes.Xor);
@@ -47,7 +56,10 @@ namespace ChocolArm64.Instruction
             EmitDataLoadOpers(Context);
 
             Context.Emit(OpCodes.Clt_Un);
-            Context.Emit(OpCodes.Not);
+
+            Context.EmitLdc_I4(1);
+
+            Context.Emit(OpCodes.Xor);
 
             Context.EmitStflg((int)APState.CBit);
         }
diff --git a/Ryujinx/Cpu/Instruction/AInstEmitScalar.cs b/Ryujinx/Cpu/Instruction/AInstEmitScalar.cs
index ab4c2fd22..cb97d40f8 100644
--- a/Ryujinx/Cpu/Instruction/AInstEmitScalar.cs
+++ b/Ryujinx/Cpu/Instruction/AInstEmitScalar.cs
@@ -507,6 +507,32 @@ namespace ChocolArm64.Instruction
             Context.EmitStvecsf(Op.Rd);
         }
 
+        public static void Ucvtf_S(AILEmitterCtx Context)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            Context.EmitLdvecsi(Op.Rn);
+
+            Context.Emit(OpCodes.Conv_R_Un);
+
+            EmitFloatCast(Context, Op.Size);
+
+            Context.EmitStvecsf(Op.Rd);
+        }
+
+        public static void Umov_S(AILEmitterCtx Context)
+        {
+            AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
+
+            Context.EmitLdvec(Op.Rn);
+            Context.EmitLdc_I4(Op.DstIndex);
+            Context.EmitLdc_I4(Op.Size);
+
+            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.ExtractVec));
+
+            Context.EmitStintzr(Op.Rd);
+        }
+
         private static void EmitScalarOp(AILEmitterCtx Context, OpCode ILOp)
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
diff --git a/Ryujinx/Cpu/Instruction/AInstEmitSimd.cs b/Ryujinx/Cpu/Instruction/AInstEmitSimd.cs
index 08fd5807e..f4f9aa1a9 100644
--- a/Ryujinx/Cpu/Instruction/AInstEmitSimd.cs
+++ b/Ryujinx/Cpu/Instruction/AInstEmitSimd.cs
@@ -11,133 +11,240 @@ namespace ChocolArm64.Instruction
 {
     static partial class AInstEmit
     {
-        public static void Add_V(AILEmitterCtx Context) => EmitVectorBinaryZx(Context, OpCodes.Add);
+        public static void Add_V(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryZx(Context, () => Context.Emit(OpCodes.Add));
+        }
 
         public static void Addp_V(AILEmitterCtx Context)
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdvec(Op.Rm);
-            Context.EmitLdc_I4(Op.Size);
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
 
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Addp64),
-                nameof(ASoftFallback.Addp128));
+            int Elems = Bytes >> Op.Size;
+            int Half  = Elems >> 1;
 
-            Context.EmitStvec(Op.Rd);
+            for (int Index = 0; Index < Elems; Index++)
+            {
+                int Elem = (Index & (Half - 1)) << 1;
+                
+                EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 0, Op.Size);
+                EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 1, Op.Size);
+
+                Context.Emit(OpCodes.Add);
+
+                EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
+            }
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
         }
 
-        public static void Addv_V(AILEmitterCtx Context) => EmitVectorAddv(Context);
+        public static void Addv_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-        public static void And_V(AILEmitterCtx Context) => EmitVectorBinaryZx(Context, OpCodes.And);
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+
+            int Results = 0;        
+
+            for (int Size = Op.Size; Size < 4; Size++)
+            {
+                for (int Index = 0; Index < (Bytes >> Size); Index += 2)
+                {
+                    EmitVectorExtractZx(Context, Op.Rn, Index + 0, Size);
+                    EmitVectorExtractZx(Context, Op.Rn, Index + 1, Size);
+
+                    Context.Emit(OpCodes.Add);
+
+                    Results++;
+                }
+            }
+
+            while (--Results > 0)
+            {
+                Context.Emit(OpCodes.Add);
+            }
+
+            EmitVectorZeroLower(Context, Op.Rd);
+            EmitVectorZeroUpper(Context, Op.Rd);
+
+            EmitVectorInsert(Context, Op.Rd, 0, Op.Size);
+        }
+
+        public static void And_V(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryZx(Context, () => Context.Emit(OpCodes.And));
+        }
+
+        public static void Bic_V(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryZx(Context, () =>
+            {
+                Context.Emit(OpCodes.Not);
+                Context.Emit(OpCodes.And);
+            });
+        }
 
-        public static void Bic_V(AILEmitterCtx Context) => EmitVectorBic(Context);
         public static void Bic_Vi(AILEmitterCtx Context)
         {
-            AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp;
-
-            Context.EmitLdvec(Op.Rd);
-            Context.EmitLdc_I8(Op.Imm);
-            Context.EmitLdc_I4(Op.Size);
-
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Bic_Vi64),
-                nameof(ASoftFallback.Bic_Vi128));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorImmBinary(Context, () =>
+            {
+                Context.Emit(OpCodes.Not);
+                Context.Emit(OpCodes.And);
+            });
         }
 
-        public static void Bsl_V(AILEmitterCtx Context) => EmitVectorBsl(Context);
+        public static void Bsl_V(AILEmitterCtx Context)
+        {
+            EmitVectorTernaryZx(Context, () =>
+            {
+                Context.EmitSttmp();
+                Context.EmitLdtmp();
 
-        public static void Cmeq_V(AILEmitterCtx Context) => EmitVectorCmp(Context, OpCodes.Beq_S);
-        public static void Cmge_V(AILEmitterCtx Context) => EmitVectorCmp(Context, OpCodes.Bge_S);
-        public static void Cmgt_V(AILEmitterCtx Context) => EmitVectorCmp(Context, OpCodes.Bgt_S);
-        public static void Cmhi_V(AILEmitterCtx Context) => EmitVectorCmp(Context, OpCodes.Bgt_Un_S);
-        public static void Cmhs_V(AILEmitterCtx Context) => EmitVectorCmp(Context, OpCodes.Bge_Un_S);
-        public static void Cmle_V(AILEmitterCtx Context) => EmitVectorCmp(Context, OpCodes.Ble_S);
-        public static void Cmlt_V(AILEmitterCtx Context) => EmitVectorCmp(Context, OpCodes.Blt_S);
+                Context.Emit(OpCodes.Xor);
+                Context.Emit(OpCodes.And);
+
+                Context.EmitLdtmp();
+
+                Context.Emit(OpCodes.Xor);
+            });
+        }
+
+        public static void Cmeq_V(AILEmitterCtx Context)
+        {
+            EmitVectorCmp(Context, OpCodes.Beq_S);
+        }
+
+        public static void Cmge_V(AILEmitterCtx Context)
+        {
+            EmitVectorCmp(Context, OpCodes.Bge_S);
+        }
+
+        public static void Cmgt_V(AILEmitterCtx Context)
+        {
+            EmitVectorCmp(Context, OpCodes.Bgt_S);
+        }
+
+        public static void Cmhi_V(AILEmitterCtx Context)
+        {
+            EmitVectorCmp(Context, OpCodes.Bgt_Un_S);
+        }
+
+        public static void Cmhs_V(AILEmitterCtx Context)
+        {
+            EmitVectorCmp(Context, OpCodes.Bge_Un_S);
+        }
+
+        public static void Cmle_V(AILEmitterCtx Context)
+        {
+            EmitVectorCmp(Context, OpCodes.Ble_S);
+        }
+
+        public static void Cmlt_V(AILEmitterCtx Context)
+        {
+            EmitVectorCmp(Context, OpCodes.Blt_S);
+        }
 
         public static void Cnt_V(AILEmitterCtx Context)
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rn);
+            int Elems = Op.RegisterSize == ARegisterSize.SIMD128 ? 16 : 8;
 
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Cnt64),
-                nameof(ASoftFallback.Cnt128));
+            for (int Index = 0; Index < Elems; Index++)
+            {
+                EmitVectorExtractZx(Context, Op.Rn, Index, 0);
 
-            Context.EmitStvec(Op.Rd);
+                Context.Emit(OpCodes.Conv_U1);
+
+                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8));
+
+                Context.Emit(OpCodes.Conv_U8);
+
+                EmitVectorInsert(Context, Op.Rd, Index, 0);
+            }
         }
 
         public static void Dup_Gp(AILEmitterCtx Context)
         {
             AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
 
-            Context.EmitLdintzr(Op.Rn);
-            Context.EmitLdc_I4(Op.Size);
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
 
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Dup_Gp64),
-                nameof(ASoftFallback.Dup_Gp128));
+            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            {
+                Context.EmitLdintzr(Op.Rn);
 
-            Context.EmitStvec(Op.Rd);
+                EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
+            }
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
         }
 
         public static void Dup_V(AILEmitterCtx Context)
         {
             AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdc_I4(Op.DstIndex);
-            Context.EmitLdc_I4(Op.Size);
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
 
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Dup_V64),
-                nameof(ASoftFallback.Dup_V128));
+            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            {
+                EmitVectorExtractZx(Context, Op.Rn, Op.DstIndex, Op.Size);
 
-            Context.EmitStvec(Op.Rd);
+                EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
+            }
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
         }
 
-        public static void Eor_V(AILEmitterCtx Context) => EmitVectorBinaryZx(Context, OpCodes.Xor);
+        public static void Eor_V(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryZx(Context, () => Context.Emit(OpCodes.Xor));
+        }
 
-        public static void Fadd_V(AILEmitterCtx Context) => EmitVectorBinaryFOp(Context, OpCodes.Add);
+        public static void Fadd_V(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryF(Context, () => Context.Emit(OpCodes.Add));
+        }
 
-        public static void Fcvtzs_V(AILEmitterCtx Context) => EmitVectorFcvts(Context);
-        public static void Fcvtzu_V(AILEmitterCtx Context) => EmitVectorFcvtu(Context);
+        public static void Fcvtzs_V(AILEmitterCtx Context)
+        {
+            EmitVectorFcvt(Context, Signed: true);
+        }
+
+        public static void Fcvtzu_V(AILEmitterCtx Context)
+        {
+            EmitVectorFcvt(Context, Signed: false);
+        }
 
         public static void Fmla_V(AILEmitterCtx Context)
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rd);
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdvec(Op.Rm);
-            Context.EmitLdc_I4(Op.SizeF);
-
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Fmla64),
-                nameof(ASoftFallback.Fmla128));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorTernaryF(Context, () =>
+            {
+                Context.Emit(OpCodes.Mul);
+                Context.Emit(OpCodes.Add);
+            });
         }
 
-        public static void Fmla_Vs(AILEmitterCtx Context)
+        public static void Fmla_Ve(AILEmitterCtx Context)
         {
-            AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp;
-
-            Context.EmitLdvec(Op.Rd);
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdvec(Op.Rm);
-            Context.EmitLdc_I4(Op.Index);
-            Context.EmitLdc_I4(Op.SizeF);
-
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Fmla_Ve64),
-                nameof(ASoftFallback.Fmla_Ve128));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorTernaryByElemF(Context, () =>
+            {
+                Context.Emit(OpCodes.Mul);
+                Context.Emit(OpCodes.Add);
+            });
         }
 
         public static void Fmov_V(AILEmitterCtx Context)
@@ -154,163 +261,176 @@ namespace ChocolArm64.Instruction
             Context.EmitStvec(Op.Rd);
         }
 
-        public static void Fmul_V(AILEmitterCtx Context) => EmitVectorBinaryFOp(Context, OpCodes.Mul);
-
-        public static void Fmul_Vs(AILEmitterCtx Context)
+        public static void Fmul_V(AILEmitterCtx Context)
         {
-            AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp;
-
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdvec(Op.Rm);
-            Context.EmitLdc_I4(Op.Index);
-            Context.EmitLdc_I4(Op.SizeF);
-
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Fmul_Ve64),
-                nameof(ASoftFallback.Fmul_Ve128));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorBinaryF(Context, () => Context.Emit(OpCodes.Mul));
+        }
+    
+        public static void Fmul_Ve(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryByElemF(Context, () => Context.Emit(OpCodes.Mul));
         }
 
-        public static void Fsub_V(AILEmitterCtx Context) => EmitVectorBinaryFOp(Context, OpCodes.Sub);
+        public static void Fsub_V(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryF(Context, () => Context.Emit(OpCodes.Sub));
+        }
 
         public static void Ins_Gp(AILEmitterCtx Context)
         {
             AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rd);
             Context.EmitLdintzr(Op.Rn);
-            Context.EmitLdc_I4(Op.DstIndex);
-            Context.EmitLdc_I4(Op.Size);
 
-            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Ins_Gp));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorInsert(Context, Op.Rd, Op.DstIndex, Op.Size);
         }
 
         public static void Ins_V(AILEmitterCtx Context)
         {
             AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rd);
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdc_I4(Op.SrcIndex);
-            Context.EmitLdc_I4(Op.DstIndex);
-            Context.EmitLdc_I4(Op.Size);
+            EmitVectorExtractZx(Context, Op.Rn, Op.SrcIndex, Op.Size);
 
-            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Ins_V));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorInsert(Context, Op.Rd, Op.DstIndex, Op.Size);
         }
 
-        public static void Ld__Vms(AILEmitterCtx Context) => EmitSimdMemMs(Context, IsLoad: true);
-        public static void Ld__Vss(AILEmitterCtx Context) => EmitSimdMemSs(Context, IsLoad: true);
-
-        public static void Mla_V(AILEmitterCtx Context) => EmitVectorMla(Context);
-
-        public static void Movi_V(AILEmitterCtx Context) => EmitMovi_V(Context, false);
-
-        public static void Mul_V(AILEmitterCtx Context) => EmitVectorBinaryZx(Context, OpCodes.Mul);
-
-        public static void Mvni_V(AILEmitterCtx Context) => EmitMovi_V(Context, true);
-
-        private static void EmitMovi_V(AILEmitterCtx Context, bool Not)
+        public static void Ld__Vms(AILEmitterCtx Context)
         {
-            AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp;
-
-            Context.EmitLdc_I8(Not ? ~Op.Imm : Op.Imm);
-            Context.EmitLdc_I4(Op.Size);
-
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Dup_Gp64),
-                nameof(ASoftFallback.Dup_Gp128));
-
-            Context.EmitStvec(Op.Rd);
+            EmitSimdMemMs(Context, IsLoad: true);
         }
 
-        public static void Neg_V(AILEmitterCtx Context) => EmitVectorUnarySx(Context, OpCodes.Neg);
+        public static void Ld__Vss(AILEmitterCtx Context)
+        {
+            EmitSimdMemSs(Context, IsLoad: true);
+        }
 
-        public static void Not_V(AILEmitterCtx Context) => EmitVectorUnaryZx(Context, OpCodes.Not);
+        public static void Mla_V(AILEmitterCtx Context)
+        {
+            EmitVectorTernaryZx(Context, () =>
+            {
+                Context.Emit(OpCodes.Mul);
+                Context.Emit(OpCodes.Add);
+            });
+        }
 
-        public static void Orr_V(AILEmitterCtx Context) => EmitVectorBinaryZx(Context, OpCodes.Or);
+        public static void Movi_V(AILEmitterCtx Context)
+        {
+            EmitVectorImmUnary(Context, () => { });
+        }
+
+        public static void Mul_V(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryZx(Context, () => Context.Emit(OpCodes.Mul));
+        }
+
+        public static void Mvni_V(AILEmitterCtx Context)
+        {
+            EmitVectorImmUnary(Context, () => Context.Emit(OpCodes.Not));
+        }
+
+        public static void Neg_V(AILEmitterCtx Context)
+        {
+            EmitVectorUnarySx(Context, () => Context.Emit(OpCodes.Neg));
+        }
+
+        public static void Not_V(AILEmitterCtx Context)
+        {
+            EmitVectorUnaryZx(Context, () => Context.Emit(OpCodes.Not));
+        }
+
+        public static void Orr_V(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryZx(Context, () => Context.Emit(OpCodes.Or));
+        }
 
         public static void Orr_Vi(AILEmitterCtx Context)
         {
-            AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp;
-
-            Context.EmitLdvec(Op.Rd);
-            Context.EmitLdc_I8(Op.Imm);
-            Context.EmitLdc_I4(Op.Size);
-
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Orr_Vi64),
-                nameof(ASoftFallback.Orr_Vi128));
-
-            Context.EmitStvec(Op.Rd);
-        }       
+            EmitVectorImmBinary(Context, () => Context.Emit(OpCodes.Or));
+        }
 
         public static void Saddw_V(AILEmitterCtx Context)
         {
-            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
-
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdvec(Op.Rm);
-            Context.EmitLdc_I4(Op.Size);
-
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Saddw),
-                nameof(ASoftFallback.Saddw2));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorWidenBinarySx(Context, () => Context.Emit(OpCodes.Add));
         }
 
-        public static void Scvtf_V(AILEmitterCtx Context) => EmitVectorScvtf(Context);
+        public static void Scvtf_V(AILEmitterCtx Context)
+        {
+            EmitVectorCvtf(Context, Signed: true);
+        }
 
         public static void Shl_V(AILEmitterCtx Context)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            EmitVectorImmBinaryZx(Context, OpCodes.Shl, Op.Imm - (8 << Op.Size));
+            int Shift = Op.Imm - (8 << Op.Size);
+
+            EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift);
         }
 
         public static void Shrn_V(AILEmitterCtx Context)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            EmitVectorImmNarrowBinaryZx(Context, OpCodes.Shr_Un, (8 << (Op.Size + 1)) - Op.Imm);
+            int Shift = (8 << (Op.Size + 1)) - Op.Imm;
+
+            EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), Shift);
         }
 
-        public static void Smax_V(AILEmitterCtx Context) => EmitVectorSmax(Context);
-        public static void Smin_V(AILEmitterCtx Context) => EmitVectorSmin(Context);
+        public static void Smax_V(AILEmitterCtx Context)
+        {
+            Type[] Types = new Type[] { typeof(long), typeof(long) };
 
-        public static void Sshl_V(AILEmitterCtx Context) => EmitVectorSshl(Context);
+            MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Max), Types);
+
+            EmitVectorBinarySx(Context, () => Context.EmitCall(MthdInfo));
+        }
+
+        public static void Smin_V(AILEmitterCtx Context)
+        {
+            Type[] Types = new Type[] { typeof(long), typeof(long) };
+
+            MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types);
+
+            EmitVectorBinarySx(Context, () => Context.EmitCall(MthdInfo));
+        }
+
+        public static void Sshl_V(AILEmitterCtx Context)
+        {
+            EmitVectorShl(Context, Signed: true);
+        }
 
         public static void Sshll_V(AILEmitterCtx Context)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdc_I4(Op.Imm - (8 << Op.Size));
-            Context.EmitLdc_I4(Op.Size);
+            int Shift = Op.Imm - (8 << Op.Size);
 
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Sshll),
-                nameof(ASoftFallback.Sshll2));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorShImmWidenBinarySx(Context, () => Context.Emit(OpCodes.Shl), Shift);
         }
 
         public static void Sshr_V(AILEmitterCtx Context)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            EmitVectorImmBinarySx(Context, OpCodes.Shr, (8 << (Op.Size + 1)) - Op.Imm);
+            int Shift = (8 << (Op.Size + 1)) - Op.Imm;
+
+            EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift);
         }
 
-        public static void St__Vms(AILEmitterCtx Context) => EmitSimdMemMs(Context, IsLoad: false);
-        public static void St__Vss(AILEmitterCtx Context) => EmitSimdMemSs(Context, IsLoad: false);
+        public static void St__Vms(AILEmitterCtx Context)
+        {
+            EmitSimdMemMs(Context, IsLoad: false);
+        }
 
-        public static void Sub_V(AILEmitterCtx Context) => EmitVectorBinaryZx(Context, OpCodes.Sub);
+        public static void St__Vss(AILEmitterCtx Context)
+        {
+            EmitSimdMemSs(Context, IsLoad: false);
+        }
+
+        public static void Sub_V(AILEmitterCtx Context)
+        {
+            EmitVectorBinaryZx(Context, () => Context.Emit(OpCodes.Sub));
+        }
 
         public static void Tbl_V(AILEmitterCtx Context)
         {
@@ -351,81 +471,45 @@ namespace ChocolArm64.Instruction
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdc_I4(Op.Size);
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
 
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Uaddlv64),
-                nameof(ASoftFallback.Uaddlv128));
+            EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size);
 
-            Context.EmitStvec(Op.Rd);
+            for (int Index = 1; Index < (Bytes >> Op.Size); Index++)
+            {
+                EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
+
+                Context.Emit(OpCodes.Add);
+            }
+
+            EmitVectorZeroLower(Context, Op.Rd);
+            EmitVectorZeroUpper(Context, Op.Rd);
+
+            EmitVectorInsert(Context, Op.Rd, 0, Op.Size);
         }
 
         public static void Uaddw_V(AILEmitterCtx Context)
         {
-            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
-
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdvec(Op.Rm);
-            Context.EmitLdc_I4(Op.Size);
-
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Uaddw),
-                nameof(ASoftFallback.Uaddw2));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorWidenBinaryZx(Context, () => Context.Emit(OpCodes.Add));
         }
 
         public static void Ucvtf_V(AILEmitterCtx Context)
         {
-            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
-
-            Context.EmitLdvec(Op.Rn);
-
-            if (Op.Size == 0)
-            {
-                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Ucvtf_V_F));
-            }
-            else if (Op.Size == 1)
-            {
-                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Ucvtf_V_D));
-            }
-            else
-            {
-                throw new InvalidOperationException();
-            }
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorCvtf(Context, Signed: false);
         }
 
-        public static void Umov_S(AILEmitterCtx Context)
+        public static void Ushl_V(AILEmitterCtx Context)
         {
-            AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
-
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdc_I4(Op.DstIndex);
-            Context.EmitLdc_I4(Op.Size);
-
-            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.ExtractVec));
-
-            Context.EmitStintzr(Op.Rd);
+            EmitVectorShl(Context, Signed: false);
         }
 
-        public static void Ushl_V(AILEmitterCtx Context) => EmitVectorUshl(Context);
-
         public static void Ushll_V(AILEmitterCtx Context)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdc_I4(Op.Imm - (8 << Op.Size));
-            Context.EmitLdc_I4(Op.Size);
+            int Shift = Op.Imm - (8 << Op.Size);
 
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Ushll),
-                nameof(ASoftFallback.Ushll2));
-
-            Context.EmitStvec(Op.Rd);
+            EmitVectorShImmWidenBinaryZx(Context, () => Context.Emit(OpCodes.Shl), Shift);
         }
 
         public static void Ushr_V(AILEmitterCtx Context)
@@ -438,33 +522,97 @@ namespace ChocolArm64.Instruction
             EmitVectorShr(Context, ShrFlags.Accumulate);
         }
 
+        [Flags]
+        private enum ShrFlags
+        {
+            None       = 0,
+            Signed     = 1 << 0,
+            Rounding   = 1 << 1,
+            Accumulate = 1 << 2
+        }
+
+        private static void EmitVectorShr(AILEmitterCtx Context, ShrFlags Flags)
+        {
+            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+            int Shift = (8 << (Op.Size + 1)) - Op.Imm;
+
+            if (Flags.HasFlag(ShrFlags.Accumulate))
+            {
+                Action Emit = () =>
+                {
+                    Context.EmitLdc_I4(Shift);
+
+                    Context.Emit(OpCodes.Shr_Un);
+                    Context.Emit(OpCodes.Add);
+                };
+
+                EmitVectorOp(Context, Emit, OperFlags.RdRn, Signed: false);
+            }
+            else
+            {
+                EmitVectorUnaryZx(Context, () =>
+                {
+                    Context.EmitLdc_I4(Shift);
+
+                    Context.Emit(OpCodes.Shr_Un);
+                });
+            }
+        }
+
         public static void Uzp1_V(AILEmitterCtx Context)
+        {
+            EmitVectorUnzip(Context, Part: 0);
+        }
+
+        public static void Uzp2_V(AILEmitterCtx Context)
+        {
+            EmitVectorUnzip(Context, Part: 1);
+        }
+
+        private static void EmitVectorUnzip(AILEmitterCtx Context, int Part)
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdvec(Op.Rm);
-            Context.EmitLdc_I4(Op.Size);
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
 
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Uzp1_V64),
-                nameof(ASoftFallback.Uzp1_V128));
+            int Elems = Bytes >> Op.Size;
+            int Half  = Elems >> 1;
 
-            Context.EmitStvec(Op.Rd);
+            for (int Index = 0; Index < Elems; Index++)
+            {
+                int Elem = Part + ((Index & (Half - 1)) << 1);
+                
+                EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size);
+
+                EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
+            }
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
         }
 
         public static void Xtn_V(AILEmitterCtx Context)
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
-            Context.EmitLdvec(Op.Rn);
-            Context.EmitLdc_I4(Op.Size);
+            int Elems = 8 >> Op.Size;
 
-            ASoftFallback.EmitCall(Context,
-                nameof(ASoftFallback.Xtn),
-                nameof(ASoftFallback.Xtn2));
+            int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
 
-            Context.EmitStvec(Op.Rd);
+            for (int Index = 0; Index < Elems; Index++)
+            {
+                EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
+
+                EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
+            }
+
+            if (Part == 0)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
         }
 
         private static void EmitSimdMemMs(AILEmitterCtx Context, bool IsLoad)
@@ -481,9 +629,6 @@ namespace ChocolArm64.Instruction
 
                 if (IsLoad)
                 {
-                    Context.EmitLdvec(Rtt);
-                    Context.EmitLdc_I4(Elem);
-                    Context.EmitLdc_I4(Op.Size);
                     Context.EmitLdarg(ATranslatedSub.MemoryArgIdx);
                     Context.EmitLdint(Op.Rn);
                     Context.EmitLdc_I8(Offset);
@@ -492,9 +637,7 @@ namespace ChocolArm64.Instruction
 
                     EmitReadZxCall(Context, Op.Size);
 
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.InsertVec));
-
-                    Context.EmitStvec(Rtt);
+                    EmitVectorInsert(Context, Rtt, Elem, Op.Size);
 
                     if (Op.RegisterSize == ARegisterSize.SIMD64 && Elem == Op.Elems - 1)
                     {
@@ -509,11 +652,7 @@ namespace ChocolArm64.Instruction
 
                     Context.Emit(OpCodes.Add);
 
-                    Context.EmitLdvec(Rtt);
-                    Context.EmitLdc_I4(Elem);
-                    Context.EmitLdc_I4(Op.Size);
-
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.ExtractVec));
+                    EmitVectorExtractZx(Context, Rtt, Elem, Op.Size);
 
                     EmitWriteCall(Context, Op.Size);
                 }
@@ -523,20 +662,7 @@ namespace ChocolArm64.Instruction
 
             if (Op.WBack)
             {
-                Context.EmitLdint(Op.Rn);
-
-                if (Op.Rm != ARegisters.ZRIndex)
-                {
-                    Context.EmitLdint(Op.Rm);
-                }
-                else
-                {
-                    Context.EmitLdc_I8(Offset);
-                }
-
-                Context.Emit(OpCodes.Add);
-
-                Context.EmitStint(Op.Rn);
+                EmitSimdMemWBack(Context, Offset);
             }
         }
 
@@ -554,9 +680,6 @@ namespace ChocolArm64.Instruction
 
                 if (IsLoad)
                 {
-                    Context.EmitLdvec(Rt);
-                    Context.EmitLdc_I4(Op.Index);
-                    Context.EmitLdc_I4(Op.Size);
                     Context.EmitLdarg(ATranslatedSub.MemoryArgIdx);
                     Context.EmitLdint(Op.Rn);
                     Context.EmitLdc_I8(Offset);
@@ -565,9 +688,7 @@ namespace ChocolArm64.Instruction
 
                     EmitReadZxCall(Context, Op.Size);
 
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.InsertVec));
-
-                    Context.EmitStvec(Rt);
+                    EmitVectorInsert(Context, Rt, Op.Index, Op.Size);
 
                     if (Op.RegisterSize == ARegisterSize.SIMD64)
                     {
@@ -582,11 +703,7 @@ namespace ChocolArm64.Instruction
 
                     Context.Emit(OpCodes.Add);
 
-                    Context.EmitLdvec(Rt);
-                    Context.EmitLdc_I4(Op.Index);
-                    Context.EmitLdc_I4(Op.Size);
-
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.ExtractVec));
+                    EmitVectorExtractZx(Context, Rt, Op.Index, Op.Size);
 
                     EmitWriteCall(Context, Op.Size);
                 }
@@ -596,105 +713,73 @@ namespace ChocolArm64.Instruction
 
             if (Op.WBack)
             {
-                Context.EmitLdint(Op.Rn);
-
-                if (Op.Rm != ARegisters.ZRIndex)
-                {
-                    Context.EmitLdint(Op.Rm);
-                }
-                else
-                {
-                    Context.EmitLdc_I8(Offset);
-                }
-
-                Context.Emit(OpCodes.Add);
-
-                Context.EmitStint(Op.Rn);
+                EmitSimdMemWBack(Context, Offset);
             }
         }
 
-        private static void EmitVectorAddv(AILEmitterCtx Context)
+        private static void EmitSimdMemWBack(AILEmitterCtx Context, int Offset)
+        {
+            AOpCodeMemReg Op = (AOpCodeMemReg)Context.CurrOp;
+
+            Context.EmitLdint(Op.Rn);
+
+            if (Op.Rm != ARegisters.ZRIndex)
+            {
+                Context.EmitLdint(Op.Rm);
+            }
+            else
+            {
+                Context.EmitLdc_I8(Offset);
+            }
+
+            Context.Emit(OpCodes.Add);
+
+            Context.EmitStint(Op.Rn);
+        }
+
+        private static void EmitVectorCmp(AILEmitterCtx Context, OpCode ILOp)
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
             int Bytes = Context.CurrOp.GetBitsCount() >> 3;
 
-            EmitVectorZeroLower(Context, Op.Rd);
-            EmitVectorZeroUpper(Context, Op.Rd);
+            ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size));
 
-            Context.EmitLdvec(Op.Rd);
-            Context.EmitLdc_I4(0);
-            Context.EmitLdc_I4(Op.Size);
-
-            EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size);
-
-            for (int Index = 1; Index < (Bytes >> Op.Size); Index++)
+            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
             {
-                EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
+                EmitVectorExtractSx(Context, Op.Rn, Index, Op.Size);
 
-                Context.Emit(OpCodes.Add);
+                if (Op is AOpCodeSimdReg BinOp)
+                {
+                    EmitVectorExtractSx(Context, BinOp.Rm, Index, Op.Size);
+                }
+                else
+                {
+                    Context.EmitLdc_I8(0);
+                }
+
+                AILLabel LblTrue = new AILLabel();
+                AILLabel LblEnd  = new AILLabel();
+
+                Context.Emit(ILOp, LblTrue);
+
+                EmitVectorInsert(Context, Op.Rd, Index, Op.Size, 0);
+
+                Context.Emit(OpCodes.Br_S, LblEnd);
+
+                Context.MarkLabel(LblTrue);
+
+                EmitVectorInsert(Context, Op.Rd, Index, Op.Size, (long)SzMask);
+
+                Context.MarkLabel(LblEnd);
             }
 
-            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.InsertVec));
-
-            Context.EmitStvec(Op.Rd);
-        }
-
-        private static void EmitVectorBic(AILEmitterCtx Context)
-        {
-            EmitVectorBinaryZx(Context, () =>
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
             {
-                Context.Emit(OpCodes.Not);
-                Context.Emit(OpCodes.And);
-            });
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
         }
 
-        private static void EmitVectorBsl(AILEmitterCtx Context)
-        {
-            EmitVectorTernaryZx(Context, () =>
-            {
-                Context.EmitSttmp();
-                Context.EmitLdtmp();
-
-                Context.Emit(OpCodes.Xor);
-                Context.Emit(OpCodes.And);
-
-                Context.EmitLdtmp();
-
-                Context.Emit(OpCodes.Xor);
-            });
-        }
-
-        private static void EmitVectorMla(AILEmitterCtx Context)
-        {
-            EmitVectorTernaryZx(Context, () =>
-            {
-                Context.Emit(OpCodes.Mul);
-                Context.Emit(OpCodes.Add);
-            });
-        }
-
-        private static void EmitVectorSmax(AILEmitterCtx Context)
-        {
-            Type[] Types = new Type[] { typeof(long), typeof(long) };
-
-            MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Max), Types);
-
-            EmitVectorBinarySx(Context, () => Context.EmitCall(MthdInfo));
-        }
-
-        private static void EmitVectorSmin(AILEmitterCtx Context)
-        {
-            Type[] Types = new Type[] { typeof(long), typeof(long) };
-
-            MethodInfo MthdInfo = typeof(Math).GetMethod(nameof(Math.Min), Types);
-
-            EmitVectorBinarySx(Context, () => Context.EmitCall(MthdInfo));
-        }
-
-        private static void EmitVectorSshl(AILEmitterCtx Context) => EmitVectorShl(Context, true);
-        private static void EmitVectorUshl(AILEmitterCtx Context) => EmitVectorShl(Context, false);
-
         private static void EmitVectorShl(AILEmitterCtx Context, bool Signed)
         {
             //This instruction shifts the value on vector A by the number of bits
@@ -758,64 +843,7 @@ namespace ChocolArm64.Instruction
             }
         }
 
-        private enum ShrFlags
-        {
-            None       = 0,
-            Signed     = 1 << 0,
-            Rounding   = 1 << 1,
-            Accumulate = 1 << 2
-        }
-
-        private static void EmitVectorShr(AILEmitterCtx Context, ShrFlags Flags)
-        {
-            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
-
-            int Shift = (8 << (Op.Size + 1)) - Op.Imm;
-
-            if (Flags.HasFlag(ShrFlags.Accumulate))
-            {
-                Action Emit = () =>
-                {
-                    Context.EmitLdc_I4(Shift);
-
-                    Context.Emit(OpCodes.Shr_Un);
-                    Context.Emit(OpCodes.Add);
-                };
-
-                EmitVectorOp(Context, Emit, OperFlags.RdRn, Signed: false);
-            }
-            else
-            {
-                EmitVectorUnaryZx(Context, () =>
-                {
-                    Context.EmitLdc_I4(Shift);
-
-                    Context.Emit(OpCodes.Shr_Un);
-                });
-            }
-        }
-
-        private static void EmitVectorFcvts(AILEmitterCtx Context)
-        {
-            EmitVectorFcvtOp(Context, Signed: true);
-        }
-
-        private static void EmitVectorFcvtu(AILEmitterCtx Context)
-        {
-            EmitVectorFcvtOp(Context, Signed: false);
-        }
-
-        private static void EmitVectorScvtf(AILEmitterCtx Context)
-        {
-            EmitVectorCvtfOp(Context, Signed: true);
-        }
-
-        private static void EmitVectorUcvtf(AILEmitterCtx Context)
-        {
-            EmitVectorCvtfOp(Context, Signed: false);
-        }
-
-        private static void EmitVectorFcvtOp(AILEmitterCtx Context, bool Signed)
+        private static void EmitVectorFcvt(AILEmitterCtx Context, bool Signed)
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
@@ -854,7 +882,7 @@ namespace ChocolArm64.Instruction
             }
         }
 
-        private static void EmitVectorCvtfOp(AILEmitterCtx Context, bool Signed)
+        private static void EmitVectorCvtf(AILEmitterCtx Context, bool Signed)
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
 
@@ -905,12 +933,43 @@ namespace ChocolArm64.Instruction
             return 0;
         }
 
-        private static void EmitVectorBinaryFOp(AILEmitterCtx Context, OpCode ILOp)
+        [Flags]
+        private enum OperFlags
         {
-            EmitVectorBinaryFOp(Context, () => Context.Emit(ILOp));
+            Rd = 1 << 0,
+            Rn = 1 << 1,
+            Rm = 1 << 2,
+
+            RnRm   = Rn | Rm,
+            RdRn   = Rd | Rn,
+            RdRnRm = Rd | Rn | Rm
         }
 
-        private static void EmitVectorBinaryFOp(AILEmitterCtx Context, Action Emit)
+        private static void EmitVectorBinaryF(AILEmitterCtx Context, Action Emit)
+        {
+            EmitVectorFOp(Context, Emit, OperFlags.RnRm);
+        }
+
+        private static void EmitVectorTernaryF(AILEmitterCtx Context, Action Emit)
+        {
+            EmitVectorFOp(Context, Emit, OperFlags.RdRnRm);
+        }
+
+        private static void EmitVectorBinaryByElemF(AILEmitterCtx Context, Action Emit)
+        {
+            AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp;
+
+            EmitVectorFOp(Context, Emit, OperFlags.RnRm, Op.Index);
+        }
+
+        private static void EmitVectorTernaryByElemF(AILEmitterCtx Context, Action Emit)
+        {
+            AOpCodeSimdRegElem Op = (AOpCodeSimdRegElem)Context.CurrOp;
+
+            EmitVectorFOp(Context, Emit, OperFlags.RdRnRm, Op.Index);
+        }
+
+        private static void EmitVectorFOp(AILEmitterCtx Context, Action Emit, OperFlags Opers, int Elem = -1)
         {
             AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
 
@@ -920,8 +979,27 @@ namespace ChocolArm64.Instruction
 
             for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++)
             {
-                EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
-                EmitVectorExtractF(Context, Op.Rm, Index, SizeF);
+                if (Opers.HasFlag(OperFlags.Rd))
+                {
+                    EmitVectorExtractF(Context, Op.Rd, Index, SizeF);
+                }
+
+                if (Opers.HasFlag(OperFlags.Rn))
+                {
+                    EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
+                }
+
+                if (Opers.HasFlag(OperFlags.Rm))
+                {
+                    if (Elem != -1)
+                    {
+                        EmitVectorExtractF(Context, Op.Rm, Elem, SizeF);
+                    }
+                    else
+                    {
+                        EmitVectorExtractF(Context, Op.Rm, Index, SizeF);
+                    }
+                }
 
                 Emit();
 
@@ -934,21 +1012,6 @@ namespace ChocolArm64.Instruction
             }
         }
 
-        private static void EmitVectorUnarySx(AILEmitterCtx Context, OpCode ILOp)
-        {
-            EmitVectorUnarySx(Context, () => Context.Emit(ILOp));
-        }
-
-        private static void EmitVectorUnaryZx(AILEmitterCtx Context, OpCode ILOp)
-        {
-            EmitVectorUnaryZx(Context, () => Context.Emit(ILOp));
-        }
-
-        private static void EmitVectorBinaryZx(AILEmitterCtx Context, OpCode ILOp)
-        {
-            EmitVectorBinaryZx(Context, () => Context.Emit(ILOp));
-        }
-
         private static void EmitVectorUnarySx(AILEmitterCtx Context, Action Emit)
         {
             EmitVectorOp(Context, Emit, OperFlags.Rn, true);
@@ -974,18 +1037,6 @@ namespace ChocolArm64.Instruction
             EmitVectorOp(Context, Emit, OperFlags.RdRnRm, false);
         }
 
-        [Flags]
-        private enum OperFlags
-        {
-            Rd = 1 << 0,
-            Rn = 1 << 1,
-            Rm = 1 << 2,
-
-            RnRm   = Rn | Rm,
-            RdRn   = Rd | Rn,
-            RdRnRm = Rd | Rn | Rm
-        }
-
         private static void EmitVectorOp(AILEmitterCtx Context, Action Emit, OperFlags Opers, bool Signed)
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
@@ -1020,27 +1071,53 @@ namespace ChocolArm64.Instruction
             }
         }
 
-        private static void EmitVectorImmBinarySx(AILEmitterCtx Context, OpCode ILOp, int Imm)
+        private static void EmitVectorImmUnary(AILEmitterCtx Context, Action Emit)
         {
-            EmitVectorImmBinarySx(Context, () => Context.Emit(ILOp), Imm);
+            EmitVectorImmOp(Context, Emit, false);
         }
 
-        private static void EmitVectorImmBinaryZx(AILEmitterCtx Context, OpCode ILOp, int Imm)
+        private static void EmitVectorImmBinary(AILEmitterCtx Context, Action Emit)
         {
-            EmitVectorImmBinaryZx(Context, () => Context.Emit(ILOp), Imm);
+            EmitVectorImmOp(Context, Emit, true);
         }
 
-        private static void EmitVectorImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
+        private static void EmitVectorImmOp(AILEmitterCtx Context, Action Emit, bool Binary)
         {
-            EmitVectorImmBinaryOp(Context, Emit, Imm, true);
+            AOpCodeSimdImm Op = (AOpCodeSimdImm)Context.CurrOp;
+
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+
+            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            {
+                if (Binary)
+                {
+                    EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size);
+                }
+
+                Context.EmitLdc_I8(Op.Imm);
+
+                Emit();
+
+                EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
+            }
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
         }
 
-        private static void EmitVectorImmBinaryZx(AILEmitterCtx Context, Action Emit, int Imm)
+        private static void EmitVectorShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
         {
-            EmitVectorImmBinaryOp(Context, Emit, Imm, false);
+            EmitVectorShImmBinaryOp(Context, Emit, Imm, true);
         }
 
-        private static void EmitVectorImmBinaryOp(AILEmitterCtx Context, Action Emit, int Imm, bool Signed)
+        private static void EmitVectorShImmBinaryZx(AILEmitterCtx Context, Action Emit, int Imm)
+        {
+            EmitVectorShImmBinaryOp(Context, Emit, Imm, false);
+        }
+
+        private static void EmitVectorShImmBinaryOp(AILEmitterCtx Context, Action Emit, int Imm, bool Signed)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
@@ -1063,35 +1140,20 @@ namespace ChocolArm64.Instruction
             }
         }
 
-        private static void EmitVectorImmNarrowBinarySx(AILEmitterCtx Context, OpCode ILOp, int Imm)
+        private static void EmitVectorShImmNarrowBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
         {
-            EmitVectorImmNarrowBinarySx(Context, () => Context.Emit(ILOp), Imm);
+            EmitVectorShImmNarrowBinaryOp(Context, Emit, Imm, true);
         }
 
-        private static void EmitVectorImmNarrowBinaryZx(AILEmitterCtx Context, OpCode ILOp, int Imm)
+        private static void EmitVectorShImmNarrowBinaryZx(AILEmitterCtx Context, Action Emit, int Imm)
         {
-            EmitVectorImmNarrowBinaryZx(Context, () => Context.Emit(ILOp), Imm);
+            EmitVectorShImmNarrowBinaryOp(Context, Emit, Imm, false);
         }
 
-        private static void EmitVectorImmNarrowBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
-        {
-            EmitVectorImmNarrowBinaryOp(Context, Emit, Imm, true);
-        }
-
-        private static void EmitVectorImmNarrowBinaryZx(AILEmitterCtx Context, Action Emit, int Imm)
-        {
-            EmitVectorImmNarrowBinaryOp(Context, Emit, Imm, false);
-        }
-
-        private static void EmitVectorImmNarrowBinaryOp(AILEmitterCtx Context, Action Emit, int Imm, bool Signed)
+        private static void EmitVectorShImmNarrowBinaryOp(AILEmitterCtx Context, Action Emit, int Imm, bool Signed)
         {
             AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            if (Op.Size < 0 || Op.Size > 2)
-            {
-                throw new InvalidOperationException(Op.Size.ToString());
-            }
-
             int Elems = 8 >> Op.Size;
 
             int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
@@ -1113,47 +1175,69 @@ namespace ChocolArm64.Instruction
             }
         }
 
-        private static void EmitVectorCmp(AILEmitterCtx Context, OpCode ILOp)
+        private static void EmitVectorShImmWidenBinarySx(AILEmitterCtx Context, Action Emit, int Imm)
         {
-            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+            EmitVectorShImmWidenBinaryOp(Context, Emit, Imm, true);
+        }
 
-            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+        private static void EmitVectorShImmWidenBinaryZx(AILEmitterCtx Context, Action Emit, int Imm)
+        {
+            EmitVectorShImmWidenBinaryOp(Context, Emit, Imm, false);
+        }
 
-            ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size));
+        private static void EmitVectorShImmWidenBinaryOp(AILEmitterCtx Context, Action Emit, int Imm, bool Signed)
+        {
+            AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
 
-            for (int Index = 0; Index < (Bytes >> Op.Size); Index++)
+            int Elems = 8 >> Op.Size;
+
+            int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
+
+            for (int Index = 0; Index < Elems; Index++)
             {
-                EmitVectorExtractSx(Context, Op.Rn, Index, Op.Size);
+                EmitVectorExtract(Context, Op.Rn, Part + Index, Op.Size, Signed);
 
-                if (Op is AOpCodeSimdReg BinOp)
-                {
-                    EmitVectorExtractSx(Context, BinOp.Rm, Index, Op.Size);
-                }
-                else
-                {
-                    Context.EmitLdc_I8(0);
-                }
+                Context.EmitLdc_I4(Imm);
 
-                AILLabel LblTrue = new AILLabel();
-                AILLabel LblEnd  = new AILLabel();
+                Emit();
 
-                Context.Emit(ILOp, LblTrue);
-
-                EmitVectorInsert(Context, Op.Rd, Index, Op.Size, 0);
-
-                Context.Emit(OpCodes.Br_S, LblEnd);
-
-                Context.MarkLabel(LblTrue);
-
-                EmitVectorInsert(Context, Op.Rd, Index, Op.Size, (long)SzMask);
-
-                Context.MarkLabel(LblEnd);
+                EmitVectorInsertTmp(Context, Index, Op.Size + 1);
             }
 
-            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
+        }
+
+        private static void EmitVectorWidenBinarySx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitVectorWidenBinary(Context, Emit, true);
+        }
+
+        private static void EmitVectorWidenBinaryZx(AILEmitterCtx Context, Action Emit)
+        {
+            EmitVectorWidenBinary(Context, Emit, false);
+        }
+
+        private static void EmitVectorWidenBinary(AILEmitterCtx Context, Action Emit, bool Signed)
+        {
+            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+            int Elems = 8 >> Op.Size;
+
+            int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
+
+            for (int Index = 0; Index < Elems; Index++)
             {
-                EmitVectorZeroUpper(Context, Op.Rd);
+                EmitVectorExtract(Context, Op.Rn,        Index, Op.Size + 1, Signed);
+                EmitVectorExtract(Context, Op.Rm, Part + Index, Op.Size,     Signed);
+
+                Emit();
+
+                EmitVectorInsertTmp(Context, Index, Op.Size + 1);
             }
+
+            Context.EmitLdvectmp();
+            Context.EmitStvec(Op.Rd);
         }
 
         private static void EmitVectorExtractF(AILEmitterCtx Context, int Reg, int Index, int Size)
@@ -1234,6 +1318,22 @@ namespace ChocolArm64.Instruction
             Context.EmitStvec(Reg);
         }
 
+        private static void EmitVectorInsertTmp(AILEmitterCtx Context, int Index, int Size)
+        {
+            if (Size < 0 || Size > 3)
+            {
+                throw new ArgumentOutOfRangeException(nameof(Size));
+            }
+
+            Context.EmitLdvectmp();
+            Context.EmitLdc_I4(Index);
+            Context.EmitLdc_I4(Size);
+
+            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
+
+            Context.EmitStvectmp();
+        }
+
         private static void EmitVectorInsert(AILEmitterCtx Context, int Reg, int Index, int Size)
         {
             if (Size < 0 || Size > 3)
diff --git a/Ryujinx/Cpu/Instruction/ASoftFallback.cs b/Ryujinx/Cpu/Instruction/ASoftFallback.cs
index 2bd25793b..a8df01070 100644
--- a/Ryujinx/Cpu/Instruction/ASoftFallback.cs
+++ b/Ryujinx/Cpu/Instruction/ASoftFallback.cs
@@ -279,106 +279,7 @@ namespace ChocolArm64.Instruction
             return InsertVec(new AVec(), 0, Size, Low + High);
         }
 
-        public static AVec Addp64(AVec LHS, AVec RHS, int Size)
-        {
-            return Addp(LHS, RHS, Size, 8);
-        }
-
-        public static AVec Addp128(AVec LHS, AVec RHS, int Size)
-        {
-            return Addp(LHS, RHS, Size, 16);
-        }
-
-        private static AVec Addp(AVec LHS, AVec RHS, int Size, int Bytes)
-        {
-            AVec Res = new AVec();
-
-            int Elems = Bytes >> Size;
-            int Half  = Elems >> 1;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                int Elem = (Index & (Half - 1)) << 1;
-
-                ulong L = Index < Half
-                    ? ExtractVec(LHS, Elem + 0, Size)
-                    : ExtractVec(RHS, Elem + 0, Size);
-                
-                ulong R = Index < Half
-                    ? ExtractVec(LHS, Elem + 1, Size)
-                    : ExtractVec(RHS, Elem + 1, Size);
-
-                Res = InsertVec(Res, Index, Size, L + R);
-            }
-
-            return Res;
-        }
-
-        public static AVec Bic_Vi64(AVec Res, ulong Imm, int Size)
-        {
-            return Bic_Vi(Res, Imm, Size, 8);
-        }
-
-        public static AVec Bic_Vi128(AVec Res, ulong Imm, int Size)
-        {
-            return Bic_Vi(Res, Imm, Size, 16);
-        }
-
-        private static AVec Bic_Vi(AVec Res, ulong Imm, int Size, int Bytes)
-        {
-            int Elems = Bytes >> Size;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                ulong Value = ExtractVec(Res, Index, Size);
-
-                Res = InsertVec(Res, Index, Size, Value & ~Imm);
-            }
-
-            return Res;
-        }
-
-        public static AVec Cnt64(AVec Vector)
-        {
-            AVec Res = new AVec();
-
-            Res.B0 = (byte)CountSetBits8(Vector.B0);
-            Res.B1 = (byte)CountSetBits8(Vector.B1);
-            Res.B2 = (byte)CountSetBits8(Vector.B2);
-            Res.B3 = (byte)CountSetBits8(Vector.B3);
-            Res.B4 = (byte)CountSetBits8(Vector.B4);
-            Res.B5 = (byte)CountSetBits8(Vector.B5);
-            Res.B6 = (byte)CountSetBits8(Vector.B6);
-            Res.B7 = (byte)CountSetBits8(Vector.B7);
-
-            return Res;
-        }
-
-        public static AVec Cnt128(AVec Vector)
-        {
-            AVec Res = new AVec();
-
-            Res.B0  = (byte)CountSetBits8(Vector.B0);
-            Res.B1  = (byte)CountSetBits8(Vector.B1);
-            Res.B2  = (byte)CountSetBits8(Vector.B2);
-            Res.B3  = (byte)CountSetBits8(Vector.B3);
-            Res.B4  = (byte)CountSetBits8(Vector.B4);
-            Res.B5  = (byte)CountSetBits8(Vector.B5);
-            Res.B6  = (byte)CountSetBits8(Vector.B6);
-            Res.B7  = (byte)CountSetBits8(Vector.B7);
-            Res.B8  = (byte)CountSetBits8(Vector.B8);
-            Res.B9  = (byte)CountSetBits8(Vector.B9);
-            Res.B10 = (byte)CountSetBits8(Vector.B10);
-            Res.B11 = (byte)CountSetBits8(Vector.B11);
-            Res.B12 = (byte)CountSetBits8(Vector.B12);
-            Res.B13 = (byte)CountSetBits8(Vector.B13);
-            Res.B14 = (byte)CountSetBits8(Vector.B14);
-            Res.B15 = (byte)CountSetBits8(Vector.B15);
-
-            return Res;
-        }
-
-        private static int CountSetBits8(byte Value)
+        public static int CountSetBits8(byte Value)
         {
             return (Value >> 0) & 1 + (Value >> 1) & 1 +
                    (Value >> 2) & 1 + (Value >> 3) & 1 +
@@ -413,248 +314,11 @@ namespace ChocolArm64.Instruction
             return InsertVec(new AVec(), 0, Size, ExtractVec(Vector, Elem, Size));
         }
 
-        public static AVec Dup_V64(AVec Vector, int Elem, int Size)
-        {
-            return Dup_V(Vector, Elem, Size, 8);
-        }
-
-        public static AVec Dup_V128(AVec Vector, int Elem, int Size)
-        {
-            return Dup_V(Vector, Elem, Size, 16);
-        }
-
-        private static AVec Dup_V(AVec Vector, int Elem, int Size, int Bytes)
-        {
-            AVec Res = new AVec();
-
-            ulong Value = ExtractVec(Vector, Elem, Size);
-
-            for (Elem = 0; Elem < (Bytes >> Size); Elem++)
-            {
-                Res = InsertVec(Res, Elem, Size, Value);
-            }
-
-            return Res;
-        }
-
-        public static AVec Fmla64(AVec Res, AVec LHS, AVec RHS, int Size)
-        {
-            return Fmla(Res, LHS, RHS, Size, 2);
-        }
-
-        public static AVec Fmla128(AVec Res, AVec LHS, AVec RHS, int Size)
-        {
-            return Fmla(Res, LHS, RHS, Size, 4);
-        }
-
-        private static AVec Fmla(AVec Res, AVec LHS, AVec RHS, int Size, int Bytes)
-        {
-            int Elems = Bytes >> Size;
-
-            if (Size == 0)
-            {
-                for (int Index = 0; Index < Elems; Index++)
-                {
-                    float L      = LHS.ExtractSingle(Index);
-                    float R      = RHS.ExtractSingle(Index);
-                    float Addend = Res.ExtractSingle(Index);
-
-                    Res = AVec.InsertSingle(Res, Index, Addend + L * R);
-                }
-            }
-            else
-            {
-                for (int Index = 0; Index < Elems; Index++)
-                {
-                    double L      = LHS.ExtractDouble(Index);
-                    double R      = RHS.ExtractDouble(Index);
-                    double Addend = Res.ExtractDouble(Index);
-
-                    Res = AVec.InsertDouble(Res, Index, Addend + L * R);
-                }
-            }
-
-            return Res;
-        }
-
-        public static AVec Fmla_Ve64(AVec Res, AVec LHS, AVec RHS, int SIdx, int Size)
-        {
-            return Fmla_Ve(Res, LHS, RHS, SIdx, Size, 2);
-        }
-
-        public static AVec Fmla_Ve128(AVec Res, AVec LHS, AVec RHS, int SIdx, int Size)
-        {
-            return Fmla_Ve(Res, LHS, RHS, SIdx, Size, 4);
-        }
-
-        private static AVec Fmla_Ve(AVec Res, AVec LHS, AVec RHS, int SIdx, int Size, int Bytes)
-        {
-            int Elems = Bytes >> Size;
-
-            if (Size == 0)
-            {
-                float R = RHS.ExtractSingle(SIdx);
-
-                for (int Index = 0; Index < Elems; Index++)
-                {
-                    float L      = LHS.ExtractSingle(Index);
-                    float Addend = Res.ExtractSingle(Index);
-
-                    Res = AVec.InsertSingle(Res, Index, Addend + L * R);
-                }
-            }
-            else
-            {
-                double R = RHS.ExtractDouble(SIdx);
-
-                for (int Index = 0; Index < Elems; Index++)
-                {
-                    double L      = LHS.ExtractDouble(Index);
-                    double Addend = Res.ExtractDouble(Index);
-
-                    Res = AVec.InsertDouble(Res, Index, Addend + L * R);
-                }
-            }
-
-            return Res;
-        }
-
         public static AVec Fmov_S(ulong Value, int Elem, int Size)
         {
             return InsertVec(new AVec(), Elem, Size, Value);
         }
 
-        public static AVec Fmul_Ve64(AVec LHS, AVec RHS, int SIdx, int Size)
-        {
-            return Fmul_Ve(LHS, RHS, SIdx, Size, 2);
-        }
-
-        public static AVec Fmul_Ve128(AVec LHS, AVec RHS, int SIdx, int Size)
-        {
-            return Fmul_Ve(LHS, RHS, SIdx, Size, 4);
-        }
-
-        private static AVec Fmul_Ve(AVec LHS, AVec RHS, int SIdx, int Size, int Bytes)
-        {
-            AVec Res = new AVec();
-
-            int Elems = Bytes >> Size;
-
-            if (Size == 0)
-            {
-                float R = RHS.ExtractSingle(SIdx);
-
-                for (int Index = 0; Index < Elems; Index++)
-                {
-                    float L = LHS.ExtractSingle(Index);
-
-                    Res = AVec.InsertSingle(Res, Index, L * R);
-                }
-            }
-            else
-            {
-                double R = RHS.ExtractDouble(SIdx);
-
-                for (int Index = 0; Index < Elems; Index++)
-                {
-                    double L = LHS.ExtractDouble(Index);
-
-                    Res = AVec.InsertDouble(Res, Index, L * R);
-                }
-            }
-
-            return Res;
-        }
-
-        public static AVec Ins_Gp(AVec Res, ulong Value, int Elem, int Size)
-        {
-            return InsertVec(Res, Elem, Size, Value);
-        }
-
-        public static AVec Ins_V(AVec Res, AVec Value, int Src, int Dst, int Size)
-        {
-            return InsertVec(Res, Dst, Size, ExtractVec(Value, Src, Size));;
-        }
-
-        public static AVec Orr_Vi64(AVec Res, ulong Imm, int Size)
-        {
-            return Orr_Vi(Res, Imm, Size, 8);
-        }
-
-        public static AVec Orr_Vi128(AVec Res, ulong Imm, int Size)
-        {
-            return Orr_Vi(Res, Imm, Size, 16);
-        }
-
-        private static AVec Orr_Vi(AVec Res, ulong Imm, int Size, int Bytes)
-        {
-            int Elems = Bytes >> Size;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                ulong Value = ExtractVec(Res, Index, Size);
-
-                Res = InsertVec(Res, Index, Size, Value | Imm);
-            }
-
-            return Res;
-        }
-
-        public static AVec Saddw(AVec LHS, AVec RHS, int Size)
-        {
-            return Saddw_(LHS, RHS, Size, false);
-        }
-
-        public static AVec Saddw2(AVec LHS, AVec RHS, int Size)
-        {
-            return Saddw_(LHS, RHS, Size, true);
-        }
-
-        private static AVec Saddw_(AVec LHS, AVec RHS, int Size, bool High)
-        {
-            AVec Res = new AVec();
-
-            int Elems = 8 >> Size;
-            int Part  = High ? Elems : 0;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                long L = ExtractSVec(LHS, Index,        Size + 1);
-                long R = ExtractSVec(RHS, Index + Part, Size);
-
-                Res = InsertSVec(Res, Index, Size + 1, L + R);
-            }
-
-            return Res;
-        }
-
-        public static AVec Sshll(AVec Vector, int Shift, int Size)
-        {
-            return Sshll_(Vector, Shift, Size, false);
-        }
-
-        public static AVec Sshll2(AVec Vector, int Shift, int Size)
-        {
-            return Sshll_(Vector, Shift, Size, true);
-        }
-
-        private static AVec Sshll_(AVec Vector, int Shift, int Size, bool High)
-        {
-            AVec Res = new AVec();
-
-            int Elems = 8 >> Size;
-            int Part  = High ? Elems : 0;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                long Value = ExtractSVec(Vector, Index + Part, Size);
-
-                Res = InsertSVec(Res, Index, Size + 1, Value << Shift);
-            }
-
-            return Res;
-        }
-
         public static AVec Tbl1_V64(AVec Vector, AVec Tb0)
         {
             return Tbl(Vector, 8, Tb0);
@@ -720,173 +384,6 @@ namespace ChocolArm64.Instruction
             return Res;
         }
 
-        public static AVec Uaddlv64(AVec Vector, int Size)
-        {
-            return Uaddlv(Vector, Size, 8);
-        }
-
-        public static AVec Uaddlv128(AVec Vector, int Size)
-        {
-            return Uaddlv(Vector, Size, 16);
-        }
-
-        private static AVec Uaddlv(AVec Vector, int Size, int Bytes)
-        {
-            int Elems = Bytes >> Size;
-
-            ulong Sum = 0;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                Sum += ExtractVec(Vector, Index, Size);
-            }
-
-            return InsertVec(new AVec(), 0, 3, Sum);
-        }
-
-        public static AVec Uaddw(AVec LHS, AVec RHS, int Size)
-        {
-            return Uaddw_(LHS, RHS, Size, false);
-        }
-
-        public static AVec Uaddw2(AVec LHS, AVec RHS, int Size)
-        {
-            return Uaddw_(LHS, RHS, Size, true);
-        }
-
-        private static AVec Uaddw_(AVec LHS, AVec RHS, int Size, bool High)
-        {
-            AVec Res = new AVec();
-
-            int Elems = 8 >> Size;
-            int Part  = High ? Elems : 0;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                ulong L = ExtractVec(LHS, Index,        Size + 1);
-                ulong R = ExtractVec(RHS, Index + Part, Size);
-
-                Res = InsertVec(Res, Index, Size + 1, L + R);
-            }
-
-            return Res;
-        }
-
-        public static AVec Ucvtf_V_F(AVec Vector)
-        {
-            return new AVec()
-            {
-                S0 = (uint)Vector.W0,
-                S1 = (uint)Vector.W1,
-                S2 = (uint)Vector.W2,
-                S3 = (uint)Vector.W3
-            };
-        }
-
-        public static AVec Ucvtf_V_D(AVec Vector)
-        {
-            return new AVec()
-            {
-                D0 = (ulong)Vector.X0,
-                D1 = (ulong)Vector.X1
-            };
-        }
-
-        public static AVec Ushll(AVec Vector, int Shift, int Size)
-        {
-            return Ushll_(Vector, Shift, Size, false);
-        }
-
-        public static AVec Ushll2(AVec Vector, int Shift, int Size)
-        {
-            return Ushll_(Vector, Shift, Size, true);
-        }
-
-        private static AVec Ushll_(AVec Vector, int Shift, int Size, bool High)
-        {
-            AVec Res = new AVec();
-
-            int Elems = 8 >> Size;
-            int Part  = High ? Elems : 0;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                ulong Value = ExtractVec(Vector, Index + Part, Size);
-
-                Res = InsertVec(Res, Index, Size + 1, Value << Shift);
-            }
-
-            return Res;
-        }
-
-        public static AVec Uzp1_V64(AVec LHS, AVec RHS, int Size)
-        {
-            return Uzp(LHS, RHS, Size, 0, 8);
-        }
-
-        public static AVec Uzp1_V128(AVec LHS, AVec RHS, int Size)
-        {
-            return Uzp(LHS, RHS, Size, 0, 16);
-        }
-
-        public static AVec Uzp2_V64(AVec LHS, AVec RHS, int Size)
-        {
-            return Uzp(LHS, RHS, Size, 1, 8);
-        }
-
-        public static AVec Uzp2_V128(AVec LHS, AVec RHS, int Size)
-        {
-            return Uzp(LHS, RHS, Size, 1, 16);
-        }
-
-        private static AVec Uzp(AVec LHS, AVec RHS, int Size, int Part, int Bytes)
-        {
-            AVec Res = new AVec();
-
-            int Elems = Bytes >> Size;
-            int Half  = Elems >> 1;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                int Elem = (Index & (Half - 1)) << 1;
-
-                ulong Value = Index < Half
-                    ? ExtractVec(LHS, Elem + Part, Size)
-                    : ExtractVec(RHS, Elem + Part, Size);
- 
-                Res = InsertVec(Res, Index, Size, Value);
-            }
-
-            return Res;
-        }
-
-        public static AVec Xtn(AVec Vector, int Size)
-        {
-            return Xtn_(Vector, Size, false);
-        }
-
-        public static AVec Xtn2(AVec Vector, int Size)
-        {
-            return Xtn_(Vector, Size, true);
-        }
-
-        private static AVec Xtn_(AVec Vector, int Size, bool High)
-        {
-            AVec Res = new AVec();
-
-            int Elems = 8 >> Size;
-            int Part  = High ? Elems : 0;
-
-            for (int Index = 0; Index < Elems; Index++)
-            {
-                ulong Value = ExtractVec(Vector, Index, Size + 1);
-
-                Res = InsertVec(Res, Index + Part, Size, Value);
-            }
-
-            return Res;
-        }
-
         public static ulong ExtractVec(AVec Vector, int Index, int Size)
         {
             switch (Size)
diff --git a/Ryujinx/Cpu/Translation/AILEmitterCtx.cs b/Ryujinx/Cpu/Translation/AILEmitterCtx.cs
index 410308ff8..4c4841c5c 100644
--- a/Ryujinx/Cpu/Translation/AILEmitterCtx.cs
+++ b/Ryujinx/Cpu/Translation/AILEmitterCtx.cs
@@ -37,6 +37,7 @@ namespace ChocolArm64.Translation
         private const int Tmp2Index = -2;
         private const int Tmp3Index = -3;
         private const int Tmp4Index = -4;
+        private const int Tmp5Index = -5;
 
         public AILEmitterCtx(ATranslator Translator, ABlock[] Graph, ABlock Root)
         {
@@ -91,7 +92,7 @@ namespace ChocolArm64.Translation
         }
 
         public bool TryOptEmitSubroutineCall()
-        {           
+        {
             if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub))
             {
                 return false;
@@ -343,6 +344,9 @@ namespace ChocolArm64.Translation
         public void EmitLdtmp() => EmitLdint(Tmp1Index);
         public void EmitSttmp() => EmitStint(Tmp1Index);
 
+        public void EmitLdvectmp() => EmitLdvec(Tmp5Index);
+        public void EmitStvectmp() => EmitStvec(Tmp5Index);
+
         public void EmitLdint(int Index) => Ldloc(Index, AIoType.Int);
         public void EmitStint(int Index) => Stloc(Index, AIoType.Int);
 
@@ -427,7 +431,8 @@ namespace ChocolArm64.Translation
                 Size |= 2;
             }
 
-            if (Op is AOpCodeMem || Op is IAOpCodeLit)
+            if ((Op is AOpCodeMem || Op is IAOpCodeLit) &&
+                !(Op is AOpCodeSimdMemMs || Op is AOpCodeSimdMemSs))
             {
                 return Size < 4 ? typeof(ulong) : typeof(AVec);
             }
diff --git a/Ryujinx/OsHle/Mutex.cs b/Ryujinx/OsHle/Mutex.cs
index 60870baa5..f8344b6f2 100644
--- a/Ryujinx/OsHle/Mutex.cs
+++ b/Ryujinx/OsHle/Mutex.cs
@@ -1,7 +1,6 @@
 using ChocolArm64.Memory;
 using Ryujinx.OsHle.Handles;
 using System.Collections.Concurrent;
-using System.Threading;
 
 namespace Ryujinx.OsHle
 {
diff --git a/Ryujinx/OsHle/Objects/Time/ISystemClock.cs b/Ryujinx/OsHle/Objects/Time/ISystemClock.cs
index 6705a5a19..1d9bae3bb 100644
--- a/Ryujinx/OsHle/Objects/Time/ISystemClock.cs
+++ b/Ryujinx/OsHle/Objects/Time/ISystemClock.cs
@@ -28,7 +28,7 @@ namespace Ryujinx.OsHle.Objects.Time
         {
             DateTime CurrentTime = DateTime.Now;
 
-            if (ClockType == SystemClockType.Standard ||
+            if (ClockType == SystemClockType.User ||
                 ClockType == SystemClockType.Network)
             {
                 CurrentTime = CurrentTime.ToUniversalTime();
diff --git a/Ryujinx/OsHle/Objects/Time/SystemClockType.cs b/Ryujinx/OsHle/Objects/Time/SystemClockType.cs
index 052152b70..ad9675aab 100644
--- a/Ryujinx/OsHle/Objects/Time/SystemClockType.cs
+++ b/Ryujinx/OsHle/Objects/Time/SystemClockType.cs
@@ -2,7 +2,7 @@ namespace Ryujinx.OsHle.Objects.Time
 {
     enum SystemClockType
     {
-        Standard,
+        User,
         Network,
         Local
     }
diff --git a/Ryujinx/OsHle/Services/ServiceTime.cs b/Ryujinx/OsHle/Services/ServiceTime.cs
index bcc046ef6..8a32aabc1 100644
--- a/Ryujinx/OsHle/Services/ServiceTime.cs
+++ b/Ryujinx/OsHle/Services/ServiceTime.cs
@@ -8,7 +8,7 @@ namespace Ryujinx.OsHle.Services
     {
         public static long TimeGetStandardUserSystemClock(ServiceCtx Context)
         {
-            MakeObject(Context, new ISystemClock(SystemClockType.Standard));
+            MakeObject(Context, new ISystemClock(SystemClockType.User));
 
             return 0;
         }
diff --git a/Ryujinx/OsHle/Svc/SvcThreadSync.cs b/Ryujinx/OsHle/Svc/SvcThreadSync.cs
index 96681df1c..d133aada0 100644
--- a/Ryujinx/OsHle/Svc/SvcThreadSync.cs
+++ b/Ryujinx/OsHle/Svc/SvcThreadSync.cs
@@ -1,4 +1,3 @@
-using ChocolArm64;
 using ChocolArm64.State;
 using Ryujinx.OsHle.Handles;