From 89ccec197ec9a5db2bb308ef3e9178910d1ab7a8 Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Tue, 10 Mar 2020 02:17:30 -0300
Subject: [PATCH] Implement VMOVL and VORR.I32 AArch32 SIMD instructions (#960)

* Implement VMOVL and VORR.I32 AArch32 SIMD instructions

* Rename <dt> to <size> on test description

* Rename Widen to Long and improve VMOVL implementation a bit
---
 ARMeilleure/Decoders/OpCode32SimdImm.cs       |  6 +--
 ARMeilleure/Decoders/OpCode32SimdLong.cs      | 27 +++++++++++++
 ARMeilleure/Decoders/OpCodeSimdHelper.cs      |  2 +-
 ARMeilleure/Decoders/OpCodeTable.cs           |  6 ++-
 .../Instructions/InstEmitSimdLogical32.cs     | 39 +++++++++++++++++++
 .../Instructions/InstEmitSimdMove32.cs        | 30 ++++++++++++++
 ARMeilleure/Instructions/InstName.cs          |  4 +-
 Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs     | 28 +++++++++++++
 Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs         | 30 ++++++++++++++
 9 files changed, 165 insertions(+), 7 deletions(-)
 create mode 100644 ARMeilleure/Decoders/OpCode32SimdLong.cs

diff --git a/ARMeilleure/Decoders/OpCode32SimdImm.cs b/ARMeilleure/Decoders/OpCode32SimdImm.cs
index 72fca59ca..c6ae7ec59 100644
--- a/ARMeilleure/Decoders/OpCode32SimdImm.cs
+++ b/ARMeilleure/Decoders/OpCode32SimdImm.cs
@@ -1,11 +1,9 @@
 namespace ARMeilleure.Decoders
 {
-    class OpCode32SimdImm : OpCode32, IOpCode32SimdImm
+    class OpCode32SimdImm : OpCode32SimdBase, IOpCode32SimdImm
     {
-        public int Vd { get; private set; }
         public bool Q { get; private set; }
         public long Immediate { get; private set; }
-        public int Size { get; private set; }
         public int Elems => GetBytesCount() >> Size;
 
         public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
@@ -24,7 +22,7 @@
             imm |= ((uint)opCode >> 12) & 0x70;
             imm |= ((uint)opCode >> 17) & 0x80;
 
-            (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm, fpBaseSize: 2);
+            (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm);
 
             RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
 
diff --git a/ARMeilleure/Decoders/OpCode32SimdLong.cs b/ARMeilleure/Decoders/OpCode32SimdLong.cs
new file mode 100644
index 000000000..c4b186832
--- /dev/null
+++ b/ARMeilleure/Decoders/OpCode32SimdLong.cs
@@ -0,0 +1,27 @@
+namespace ARMeilleure.Decoders
+{
+    class OpCode32SimdLong : OpCode32SimdBase
+    {
+        public bool U { get; private set; }
+
+        public OpCode32SimdLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
+        {
+            int imm3h = (opCode >> 19) & 0x7;
+
+            // The value must be a power of 2, otherwise it is the encoding of another instruction.
+            switch (imm3h)
+            {
+                case 1: Size = 0; break;
+                case 2: Size = 1; break;
+                case 4: Size = 2; break;
+            }
+
+            U = ((opCode >> 24) & 0x1) != 0;
+
+            RegisterSize = RegisterSize.Simd64;
+
+            Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
+            Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
+        }
+    }
+}
diff --git a/ARMeilleure/Decoders/OpCodeSimdHelper.cs b/ARMeilleure/Decoders/OpCodeSimdHelper.cs
index 3e5a7f65b..02f74d030 100644
--- a/ARMeilleure/Decoders/OpCodeSimdHelper.cs
+++ b/ARMeilleure/Decoders/OpCodeSimdHelper.cs
@@ -2,7 +2,7 @@
 {
     public static class OpCodeSimdHelper
     {
-        public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm, int fpBaseSize = 0)
+        public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm)
         {
             int modeLow = cMode & 1;
             int modeHigh = cMode >> 1;
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index 7b1ebbc74..eac317065 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -158,7 +158,7 @@ namespace ARMeilleure.Decoders
             SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv,            InstEmit.Sdiv,            typeof(OpCodeAluBinary));
             SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl,          InstEmit.Smaddl,          typeof(OpCodeMul));
             SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl,          InstEmit.Smsubl,          typeof(OpCodeMul));
-            SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smul__,           InstEmit.Smulh,           typeof(OpCodeMul));
+            SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh,           InstEmit.Smulh,           typeof(OpCodeMul));
             SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr,            InstEmit.Stlr,            typeof(OpCodeMemEx));
             SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp,           InstEmit.Stlxp,           typeof(OpCodeMemEx));
             SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr,           InstEmit.Stlxr,           typeof(OpCodeMemEx));
@@ -829,6 +829,9 @@ namespace ARMeilleure.Decoders
             SetA32("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov,    InstEmit32.Vmov_I,   typeof(OpCode32SimdImm)); // D/Q (dt - from cmode).
             SetA32("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov,    InstEmit32.Vmov_I,   typeof(OpCode32SimdImm)); // D/Q I64.
             SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov,    InstEmit32.Vmov_S,   typeof(OpCode32SimdS));
+            SetA32("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl,   InstEmit32.Vmovl,    typeof(OpCode32SimdLong));
+            SetA32("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl,   InstEmit32.Vmovl,    typeof(OpCode32SimdLong));
+            SetA32("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl,   InstEmit32.Vmovl,    typeof(OpCode32SimdLong));
             SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn,   InstEmit32.Vmovn,    typeof(OpCode32SimdCmpZ));
             SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs,    InstEmit32.Vmrs,     typeof(OpCode32SimdSpecial));
             SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr,    InstEmit32.Vmsr,     typeof(OpCode32SimdSpecial));
@@ -845,6 +848,7 @@ namespace ARMeilleure.Decoders
             SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls,   InstEmit32.Vnmls_S,  typeof(OpCode32SimdRegS));
             SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul,   InstEmit32.Vnmul_S,  typeof(OpCode32SimdRegS));
             SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr,    InstEmit32.Vorr_I,   typeof(OpCode32SimdBinary));
+            SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr,    InstEmit32.Vorr_II,  typeof(OpCode32SimdImm));
             SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd,   InstEmit32.Vpadd_I,  typeof(OpCode32SimdReg));
             SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd,   InstEmit32.Vpadd_V,  typeof(OpCode32SimdReg));
             SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe,  InstEmit32.Vrecpe,   typeof(OpCode32SimdSqrte));
diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
index fef40a17e..3698f3329 100644
--- a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs
@@ -2,7 +2,10 @@
 using ARMeilleure.IntermediateRepresentation;
 using ARMeilleure.Translation;
 
+using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
 using static ARMeilleure.Instructions.InstEmitSimdHelper32;
+using static ARMeilleure.IntermediateRepresentation.OperandHelper;
 
 namespace ARMeilleure.Instructions
 {
@@ -64,6 +67,42 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vorr_II(ArmEmitterContext context)
+        {
+            OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
+
+            long immediate = op.Immediate;
+
+            // Replicate fields to fill the 64-bits, if size is < 64-bits.
+            switch (op.Size)
+            {
+                case 0: immediate *= 0x0101010101010101L; break;
+                case 1: immediate *= 0x0001000100010001L; break;
+                case 2: immediate *= 0x0000000100000001L; break;
+            }
+
+            Operand imm = Const(immediate);
+            Operand res = GetVecA32(op.Qd);
+
+            if (op.Q)
+            {
+                for (int elem = 0; elem < 2; elem++)
+                {
+                    Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
+
+                    res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), elem, 3);
+                }
+            }
+            else
+            {
+                Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
+
+                res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), op.Vd & 1, 3);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
+        }
+
         private static void EmitBifBit(ArmEmitterContext context, bool notRm)
         {
             OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
index 17ff66b0e..f11f9cc59 100644
--- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
@@ -139,6 +139,36 @@ namespace ARMeilleure.Instructions
             }
         }
 
+        public static void Vmovl(ArmEmitterContext context)
+        {
+            OpCode32SimdLong op = (OpCode32SimdLong)context.CurrOp;
+
+            Operand res = context.VectorZero();
+
+            int elems = op.GetBytesCount() >> op.Size;
+
+            for (int index = 0; index < elems; index++)
+            {
+                Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
+
+                if (op.Size == 2)
+                {
+                    if (op.U)
+                    {
+                        me = context.ZeroExtend32(OperandType.I64, me);
+                    }
+                    else
+                    {
+                        me = context.SignExtend32(OperandType.I64, me);
+                    }
+                }
+
+                res = EmitVectorInsert(context, res, me, index, op.Size + 1);
+            }
+
+            context.Copy(GetVecA32(op.Qd), res);
+        }
+
         public static void Vtbl(ArmEmitterContext context)
         {
             OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index 049c956dc..69969e9ff 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -81,7 +81,7 @@ namespace ARMeilleure.Instructions
         Sdiv,
         Smaddl,
         Smsubl,
-        Smul__,
+        Smulh,
         Smull,
         Smulw_,
         Ssat,
@@ -500,6 +500,7 @@ namespace ARMeilleure.Instructions
         Smlaw_,
         Smmla,
         Smmls,
+        Smul__,
         Smmul,
         Stl,
         Stlb,
@@ -560,6 +561,7 @@ namespace ARMeilleure.Instructions
         Vmla,
         Vmls,
         Vmov,
+        Vmovl,
         Vmovn,
         Vmrs,
         Vmsr,
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs
index dfbd3b0bd..459127de2 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs
@@ -56,6 +56,34 @@ namespace Ryujinx.Tests.Cpu
 
             CompareAgainstUnicorn();
         }
+
+        [Test, Pairwise, Description("VORR.I32 <Vd>, #<imm>")]
+        public void Vorr_II([Range(0u, 4u)] uint rd,
+                            [Random(RndCnt)] ulong z,
+                            [Random(RndCnt)] byte imm,
+                            [Values(0u, 1u, 2u, 3u)] uint cMode,
+                            [Values] bool q)
+        {
+            uint opcode = 0xf2800110u; // VORR.I32 D0, #0
+
+            if (q)
+            {
+                opcode |= 1 << 6;
+                rd <<= 1;
+            }
+
+            opcode |= (uint)(imm & 0xf) << 0;
+            opcode |= (uint)(imm & 0x70) << 12;
+            opcode |= (uint)(imm & 0x80) << 17;
+            opcode |= (cMode & 0x3) << 9;
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+
+            V128 v0 = MakeVectorE0E1(z, z);
+
+            SingleOpcode(opcode, v0: v0);
+
+            CompareAgainstUnicorn();
+        }
 #endif
     }
 }
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs
index 13d610788..8c9627ce0 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs
@@ -228,6 +228,36 @@ namespace Ryujinx.Tests.Cpu
             CompareAgainstUnicorn();
         }
 
+        [Test, Pairwise, Description("VMOVL.<size> <Qd>, <Dm>")]
+        public void Vmovl([Values(0u, 1u, 2u, 3u)] uint vm,
+                          [Values(0u, 2u, 4u, 6u)] uint vd,
+                          [Values(1u, 2u, 4u)] uint imm3H,
+                          [Values] bool u)
+        {
+            // This is not VMOVL because imm3H = 0, but once
+            // we shift in the imm3H value it turns into VMOVL.
+            uint opcode = 0xf2800a10u; // VMOV.I16 D0, #0
+
+            opcode |= (vm & 0x10) << 1;
+            opcode |= (vm & 0xf);
+            opcode |= (vd & 0x10) << 18;
+            opcode |= (vd & 0xf) << 12;
+            opcode |= (imm3H & 0x7) << 19;
+            if (u)
+            {
+                opcode |= 1 << 24;
+            }
+
+            V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+            V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+            V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+            V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
+
+            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3);
+
+            CompareAgainstUnicorn();
+        }
+
         [Test, Pairwise, Description("VTRN.<size> <Vd>, <Vm>")]
         public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm,
                          [Values(0u, 1u, 2u, 3u)] uint vd,