From 7acd0e01226d64d05b2675f6ae07507039a31835 Mon Sep 17 00:00:00 2001
From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>
Date: Sun, 8 Apr 2018 21:08:57 +0200
Subject: [PATCH] Add FMUL (scalar, by element) instruction; add FRECPE, FRECPS
 (scalar & vector) instructions. Add 5 simple tests. (#74)

* Update AOpCodeTable.cs

* Update AInstEmitSimdArithmetic.cs

* Update AInstEmitSimdHelper.cs

* Update CpuTestSimdArithmetic.cs

* Update AOpCodeTable.cs

* Update AInstEmitSimdArithmetic.cs
---
 ChocolArm64/AOpCodeTable.cs                   |   5 +
 .../Instruction/AInstEmitSimdArithmetic.cs    | 111 +++++++++++++++++-
 .../Instruction/AInstEmitSimdHelper.cs        |  28 ++++-
 Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs    |  47 ++++++++
 4 files changed, 189 insertions(+), 2 deletions(-)

diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs
index b323a112..3c1ec4bb 100644
--- a/ChocolArm64/AOpCodeTable.cs
+++ b/ChocolArm64/AOpCodeTable.cs
@@ -224,6 +224,7 @@ namespace ChocolArm64
             Set("1001111010101111000000xxxxxxxxxx", AInstEmit.Fmov_Itof1,    typeof(AOpCodeSimdCvt));
             Set("000111110x0xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Fmsub_S,       typeof(AOpCodeSimdReg));
             Set("000111100x1xxxxx000010xxxxxxxxxx", AInstEmit.Fmul_S,        typeof(AOpCodeSimdReg));
+            Set("010111111<<xxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Se,       typeof(AOpCodeSimdRegElemF));
             Set("0>1011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmul_V,        typeof(AOpCodeSimdReg));
             Set("0x0011111<<xxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Ve,       typeof(AOpCodeSimdRegElemF));
             Set("000111100x100001010000xxxxxxxxxx", AInstEmit.Fneg_S,        typeof(AOpCodeSimd));
@@ -231,6 +232,10 @@ namespace ChocolArm64
             Set("000111110x1xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Fnmadd_S,      typeof(AOpCodeSimdReg));
             Set("000111110x1xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Fnmsub_S,      typeof(AOpCodeSimdReg));
             Set("000111100x1xxxxx100010xxxxxxxxxx", AInstEmit.Fnmul_S,       typeof(AOpCodeSimdReg));
+            Set("010111101x100001110110xxxxxxxxxx", AInstEmit.Frecpe_S,      typeof(AOpCodeSimd));
+            Set("0>0011101<100001110110xxxxxxxxxx", AInstEmit.Frecpe_V,      typeof(AOpCodeSimd));
+            Set("010111100x1xxxxx111111xxxxxxxxxx", AInstEmit.Frecps_S,      typeof(AOpCodeSimdReg));
+            Set("0>0011100<1xxxxx111111xxxxxxxxxx", AInstEmit.Frecps_V,      typeof(AOpCodeSimdReg));
             Set("000111100x100110010000xxxxxxxxxx", AInstEmit.Frinta_S,      typeof(AOpCodeSimd));
             Set("0>1011100<100001100010xxxxxxxxxx", AInstEmit.Frinta_V,      typeof(AOpCodeSimd));
             Set("000111100x100111110000xxxxxxxxxx", AInstEmit.Frinti_S,      typeof(AOpCodeSimd));
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index bf119a18..721fd7eb 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -256,6 +256,11 @@ namespace ChocolArm64.Instruction
             EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
         }
 
+        public static void Fmul_Se(AILEmitterCtx Context)
+        {
+            EmitScalarBinaryOpByElemF(Context, () => Context.Emit(OpCodes.Mul));
+        }
+
         public static void Fmul_V(AILEmitterCtx Context)
         {
             EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
@@ -324,6 +329,110 @@ namespace ChocolArm64.Instruction
             });
         }
 
+        public static void Frecpe_S(AILEmitterCtx Context)
+        {
+            EmitFrecpe(Context, 0, Scalar: true);
+        }
+
+        public static void Frecpe_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            int SizeF = Op.Size & 1;
+
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+
+            for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
+            {
+                EmitFrecpe(Context, Index, Scalar: false);
+            }
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
+        }
+
+        private static void EmitFrecpe(AILEmitterCtx Context, int Index, bool Scalar)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            int SizeF = Op.Size & 1;
+
+            if (SizeF == 0)
+            {
+                Context.EmitLdc_R4(1);
+            }
+            else /* if (SizeF == 1) */
+            {
+                Context.EmitLdc_R8(1);
+            }
+
+            EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
+
+            Context.Emit(OpCodes.Div);
+
+            if (Scalar)
+            {
+                EmitVectorZeroAll(Context, Op.Rd);
+            }
+
+            EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
+        }
+
+        public static void Frecps_S(AILEmitterCtx Context)
+        {
+            EmitFrecps(Context, 0, Scalar: true);
+        }
+
+        public static void Frecps_V(AILEmitterCtx Context)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            int SizeF = Op.Size & 1;
+
+            int Bytes = Context.CurrOp.GetBitsCount() >> 3;
+
+            for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
+            {
+                EmitFrecps(Context, Index, Scalar: false);
+            }
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
+        }
+
+        private static void EmitFrecps(AILEmitterCtx Context, int Index, bool Scalar)
+        {
+            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+            int SizeF = Op.Size & 1;
+
+            if (SizeF == 0)
+            {
+                Context.EmitLdc_R4(2);
+            }
+            else /* if (SizeF == 1) */
+            {
+                Context.EmitLdc_R8(2);
+            }
+
+            EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
+            EmitVectorExtractF(Context, Op.Rm, Index, SizeF);
+
+            Context.Emit(OpCodes.Mul);
+            Context.Emit(OpCodes.Sub);
+
+            if (Scalar)
+            {
+                EmitVectorZeroAll(Context, Op.Rd);
+            }
+
+            EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
+        }
+
         public static void Frinta_S(AILEmitterCtx Context)
         {
             AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
@@ -745,4 +854,4 @@ namespace ChocolArm64.Instruction
             EmitVectorWidenRnRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Mul));
         }
     }
-}
\ No newline at end of file
+}
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index 264919ab..9ef9d02f 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -120,6 +120,32 @@ namespace ChocolArm64.Instruction
             Context.EmitCall(MthdInfo);
         }
 
+        public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit)
+        {
+            AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp;
+
+            EmitScalarOpByElemF(Context, Emit, Op.Index, Ternary: false);
+        }
+
+        public static void EmitScalarOpByElemF(AILEmitterCtx Context, Action Emit, int Elem, bool Ternary)
+        {
+            AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+            int SizeF = Op.Size & 1;
+
+            if (Ternary)
+            {
+                EmitVectorExtractF(Context, Op.Rd, 0, SizeF);
+            }
+
+            EmitVectorExtractF(Context, Op.Rn, 0,    SizeF);
+            EmitVectorExtractF(Context, Op.Rm, Elem, SizeF);
+
+            Emit();
+
+            EmitScalarSetF(Context, Op.Rd, SizeF);
+        }
+
         public static void EmitScalarUnaryOpSx(AILEmitterCtx Context, Action Emit)
         {
             EmitScalarOp(Context, Emit, OperFlags.Rn, true);
@@ -724,4 +750,4 @@ namespace ChocolArm64.Instruction
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
index 7765253b..ba82be31 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
@@ -44,6 +44,53 @@ namespace Ryujinx.Tests.Cpu
                 Assert.AreEqual(Result1, ThreadState.V0.X1);
             });
         }
+
+        [Test, Description("fmul s6, s1, v0.s[2]")]
+        public void Fmul_Se([Random(10)] float A, [Random(10)] float B)
+        {
+            AThreadState ThreadState = SingleOpcode(0x5F809826, V1: new AVec { S0 = A }, V0: new AVec { S2 = B });
+
+            Assert.That(ThreadState.V6.S0, Is.EqualTo(A * B));
+        }
+
+        [Test, Description("frecpe v2.4s, v0.4s")]
+        public void Frecpe_V([Random(100)] float A)
+        {
+            AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: new AVec { S0 = A, S1 = A, S2 = A, S3 = A });
+
+            Assert.That(ThreadState.V2.S0, Is.EqualTo(1 / A));
+            Assert.That(ThreadState.V2.S1, Is.EqualTo(1 / A));
+            Assert.That(ThreadState.V2.S2, Is.EqualTo(1 / A));
+            Assert.That(ThreadState.V2.S3, Is.EqualTo(1 / A));
+        }
+
+        [Test, Description("frecpe d0, d1")]
+        public void Frecpe_S([Random(100)] double A)
+        {
+            AThreadState ThreadState = SingleOpcode(0x5EE1D820, V1: new AVec { D0 = A });
+
+            Assert.That(ThreadState.V0.D0, Is.EqualTo(1 / A));
+        }
+
+        [Test, Description("frecps v4.4s, v2.4s, v0.4s")]
+        public void Frecps_V([Random(10)] float A, [Random(10)] float B)
+        {
+            AThreadState ThreadState = SingleOpcode(0x4E20FC44, V2: new AVec { S0 = A, S1 = A, S2 = A, S3 = A },
+                                                                V0: new AVec { S0 = B, S1 = B, S2 = B, S3 = B });
+
+            Assert.That(ThreadState.V4.S0, Is.EqualTo(2 - (A * B)));
+            Assert.That(ThreadState.V4.S1, Is.EqualTo(2 - (A * B)));
+            Assert.That(ThreadState.V4.S2, Is.EqualTo(2 - (A * B)));
+            Assert.That(ThreadState.V4.S3, Is.EqualTo(2 - (A * B)));
+        }
+
+        [Test, Description("frecps d0, d1, d2")]
+        public void Frecps_S([Random(10)] double A, [Random(10)] double B)
+        {
+            AThreadState ThreadState = SingleOpcode(0x5E62FC20, V1: new AVec { D0 = A }, V2: new AVec { D0 = B });
+
+            Assert.That(ThreadState.V0.D0, Is.EqualTo(2 - (A * B)));
+        }
     
         [TestCase(0x3FE66666u, false, 0x40000000u)]
         [TestCase(0x3F99999Au, false, 0x3F800000u)]