From 10aa11ce13291cf2ea2aeb751838c65c45fdc0ba Mon Sep 17 00:00:00 2001
From: riperiperi <rhy3756547@hotmail.com>
Date: Thu, 17 Dec 2020 18:39:52 +0000
Subject: [PATCH] Interrupt GPU command processing when a frame's fence is
 reached. (#1741)

* Interrupt GPU command processing when a frame's fence is reached.

* Accumulate times rather than %s

* Accurate timer for vsync

Spin wait for the last .667ms of a frame. Avoids issues caused by signalling 16ms vsync. (periodic stutters in smo)

* Use event wait for better timing.

* Fix lazy wait

Windows doesn't seem to want to do 1ms consistently, so force a spin if we're less than 2ms.

* A bit more efficiency on frame waits.

Should now wait the remainder 0.6667 instead of 1.6667 sometimes (odd waits above 1ms are reliable, unlike 1ms waits)

* Better swap interval 0 solution

737 fps without breaking a sweat. Downside: Vsync can no longer be disabled on games that use the event heavily (link's awakening - which is ok since it breaks anyways)

* Fix comment.

* Address Comments.
---
 .../Engine/GPFifo/GPFifoDevice.cs             | 16 ++++-
 Ryujinx.Graphics.Gpu/Window.cs                | 27 ++++++++
 .../SurfaceFlinger/BufferQueueCore.cs         |  7 ++
 .../SurfaceFlinger/BufferQueueProducer.cs     |  2 +
 .../Services/SurfaceFlinger/SurfaceFlinger.cs | 69 +++++++++++++++++--
 .../SurfaceFlinger/Types/AndroidFence.cs      |  7 ++
 Ryujinx.HLE/PerformanceStatistics.cs          | 30 ++++----
 Ryujinx.HLE/Switch.cs                         |  5 ++
 Ryujinx/Ui/GLRenderer.cs                      | 13 ++--
 9 files changed, 146 insertions(+), 30 deletions(-)

diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
index 28430f21..25614a13 100644
--- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs
@@ -66,6 +66,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         private readonly AutoResetEvent _event;
         private readonly GPFifoProcessor _processor;
 
+        private bool _interrupt;
+
         /// <summary>
         /// Creates a new instance of the GPU General Purpose FIFO device.
         /// </summary>
@@ -163,7 +165,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         /// <returns>True if commands were received, false if wait timed out</returns>
         public bool WaitForCommands()
         {
-            return _event.WaitOne(8) && !_commandBufferQueue.IsEmpty;
+            return !_commandBufferQueue.IsEmpty || (_event.WaitOne(8) && !_commandBufferQueue.IsEmpty);
         }
 
         /// <summary>
@@ -171,13 +173,23 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
         /// </summary>
         public void DispatchCalls()
         {
-            while (_ibEnable && _commandBufferQueue.TryDequeue(out CommandBuffer entry))
+            while (_ibEnable && !_interrupt && _commandBufferQueue.TryDequeue(out CommandBuffer entry))
             {
                 _currentCommandBuffer = entry;
                 _currentCommandBuffer.Fetch(_context);
 
                 _processor.Process(_currentCommandBuffer.Words);
             }
+
+            _interrupt = false;
+        }
+
+        /// <summary>
+        /// Interrupts command processing. This will break out of the DispatchCalls loop.
+        /// </summary>
+        public void Interrupt()
+        {
+            _interrupt = true;
         }
 
         /// <summary>
diff --git a/Ryujinx.Graphics.Gpu/Window.cs b/Ryujinx.Graphics.Gpu/Window.cs
index cf5c01ef..9d269356 100644
--- a/Ryujinx.Graphics.Gpu/Window.cs
+++ b/Ryujinx.Graphics.Gpu/Window.cs
@@ -2,6 +2,7 @@ using Ryujinx.Graphics.GAL;
 using Ryujinx.Graphics.Gpu.Image;
 using System;
 using System.Collections.Concurrent;
+using System.Threading;
 
 namespace Ryujinx.Graphics.Gpu
 {
@@ -69,6 +70,8 @@ namespace Ryujinx.Graphics.Gpu
 
         private readonly ConcurrentQueue<PresentationTexture> _frameQueue;
 
+        private int _framesAvailable;
+
         /// <summary>
         /// Creates a new instance of the GPU presentation window.
         /// </summary>
@@ -157,5 +160,29 @@ namespace Ryujinx.Graphics.Gpu
                 pt.ReleaseCallback(pt.UserObj);
             }
         }
+
+        /// <summary>
+        /// Indicate that a frame on the queue is ready to be acquired.
+        /// </summary>
+        public void SignalFrameReady()
+        {
+            Interlocked.Increment(ref _framesAvailable);
+        }
+
+        /// <summary>
+        /// Determine if any frames are available, and decrement the available count if there are.
+        /// </summary>
+        /// <returns>True if a frame is available, false otherwise</returns>
+        public bool ConsumeFrameAvailable()
+        {
+            if (Interlocked.CompareExchange(ref _framesAvailable, 0, 0) != 0)
+            {
+                Interlocked.Decrement(ref _framesAvailable);
+
+                return true;
+            }
+
+            return false;
+        }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.HLE/HOS/Services/SurfaceFlinger/BufferQueueCore.cs b/Ryujinx.HLE/HOS/Services/SurfaceFlinger/BufferQueueCore.cs
index 1043dac9..b8fc71f3 100644
--- a/Ryujinx.HLE/HOS/Services/SurfaceFlinger/BufferQueueCore.cs
+++ b/Ryujinx.HLE/HOS/Services/SurfaceFlinger/BufferQueueCore.cs
@@ -46,6 +46,8 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
 
         public const int BufferHistoryArraySize = 8;
 
+        public event Action BufferQueued;
+
         public BufferQueueCore(Switch device, long pid)
         {
             Slots                    = new BufferSlotArray();
@@ -197,6 +199,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
             WaitForLock();
         }
 
+        public void SignalQueueEvent()
+        {
+            BufferQueued?.Invoke();
+        }
+
         private void WaitForLock()
         {
             if (Active)
diff --git a/Ryujinx.HLE/HOS/Services/SurfaceFlinger/BufferQueueProducer.cs b/Ryujinx.HLE/HOS/Services/SurfaceFlinger/BufferQueueProducer.cs
index 03df04ad..d4227f01 100644
--- a/Ryujinx.HLE/HOS/Services/SurfaceFlinger/BufferQueueProducer.cs
+++ b/Ryujinx.HLE/HOS/Services/SurfaceFlinger/BufferQueueProducer.cs
@@ -486,6 +486,8 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
                 Monitor.PulseAll(_callbackLock);
             }
 
+            Core.SignalQueueEvent();
+
             return Status.Success;
         }
 
diff --git a/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs b/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs
index bdffb499..1fef6860 100644
--- a/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs
+++ b/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs
@@ -25,8 +25,12 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
 
         private Stopwatch _chrono;
 
+        private ManualResetEvent _event = new ManualResetEvent(false);
+        private AutoResetEvent _nextFrameEvent = new AutoResetEvent(true);
         private long _ticks;
         private long _ticksPerFrame;
+        private long _spinTicks;
+        private long _1msTicks;
 
         private int _swapInterval;
 
@@ -61,8 +65,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
             };
 
             _chrono = new Stopwatch();
+            _chrono.Start();
 
             _ticks = 0;
+            _spinTicks = Stopwatch.Frequency / 500;
+            _1msTicks = Stopwatch.Frequency / 1000;
 
             UpdateSwapInterval(1);
 
@@ -76,6 +83,7 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
             // If the swap interval is 0, Game VSync is disabled.
             if (_swapInterval == 0)
             {
+                _nextFrameEvent.Set();
                 _ticksPerFrame = 1;
             }
             else
@@ -129,6 +137,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
 
                 BufferQueueCore core = BufferQueue.CreateBufferQueue(_device, pid, out BufferQueueProducer producer, out BufferQueueConsumer consumer);
 
+                core.BufferQueued += () =>
+                {
+                    _nextFrameEvent.Set();
+                };
+
                 _layers.Add(layerId, new Layer
                 {
                     ProducerBinderId = HOSBinderDriverServer.RegisterBinderObject(producer),
@@ -189,23 +202,59 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
         {
             _isRunning = true;
 
+            long lastTicks = _chrono.ElapsedTicks;
+
             while (_isRunning)
             {
-                _ticks += _chrono.ElapsedTicks;
+                long ticks = _chrono.ElapsedTicks;
 
-                _chrono.Restart();
-
-                if (_ticks >= _ticksPerFrame)
+                if (_swapInterval == 0)
                 {
                     Compose();
 
                     _device.System?.SignalVsync();
 
-                    _ticks = Math.Min(_ticks - _ticksPerFrame, _ticksPerFrame);
+                    _nextFrameEvent.WaitOne(17);
+                    lastTicks = ticks;
                 }
+                else
+                {
+                    _ticks += ticks - lastTicks;
+                    lastTicks = ticks;
 
-                // Sleep the minimal amount of time to avoid being too expensive.
-                Thread.Sleep(1);
+                    if (_ticks >= _ticksPerFrame)
+                    {
+                        Compose();
+
+                        _device.System?.SignalVsync();
+
+                        // Apply a maximum bound of 3 frames to the tick remainder, in case some event causes Ryujinx to pause for a long time or messes with the timer.
+                        _ticks = Math.Min(_ticks - _ticksPerFrame, _ticksPerFrame * 3);
+                    }
+
+                    // Sleep if possible. If the time til the next frame is too low, spin wait instead.
+                    long diff = _ticksPerFrame - (_ticks + _chrono.ElapsedTicks - ticks);
+                    if (diff > 0)
+                    {
+                        if (diff < _spinTicks)
+                        {
+                            do
+                            {
+                                // SpinWait is a little more HT/SMT friendly than aggressively updating/checking ticks.
+                                // The value of 5 still gives us quite a bit of precision (~0.0003ms variance at worst) while waiting a reasonable amount of time.
+                                Thread.SpinWait(5);
+
+                                ticks = _chrono.ElapsedTicks;
+                                _ticks += ticks - lastTicks;
+                                lastTicks = ticks;
+                            } while (_ticks < _ticksPerFrame);
+                        }
+                        else
+                        {
+                            _event.WaitOne((int)(diff / _1msTicks));
+                        }
+                    }
+                }
             }
         }
 
@@ -299,6 +348,12 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
                 Item  = item,
             };
 
+            item.Fence.RegisterCallback(_device.Gpu, () => 
+            {
+                _device.Gpu.Window.SignalFrameReady();
+                _device.Gpu.GPFifo.Interrupt();
+            });
+
             _device.Gpu.Window.EnqueueFrameThreadSafe(
                 frameBufferAddress,
                 frameBufferWidth,
diff --git a/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Types/AndroidFence.cs b/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Types/AndroidFence.cs
index 7c5c9ba1..62c53f2d 100644
--- a/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Types/AndroidFence.cs
+++ b/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Types/AndroidFence.cs
@@ -66,6 +66,13 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger
             return false;
         }
 
+        public void RegisterCallback(GpuContext gpuContext, Action callback)
+        {
+            ref NvFence fence = ref NvFences[FenceCount - 1];
+
+            gpuContext.Synchronization.RegisterCallbackOnSyncpoint(fence.Id, fence.Value, callback);
+        }
+
         public uint GetFlattenedSize()
         {
             return (uint)Unsafe.SizeOf<AndroidFence>();
diff --git a/Ryujinx.HLE/PerformanceStatistics.cs b/Ryujinx.HLE/PerformanceStatistics.cs
index be86584a..fdc1e99f 100644
--- a/Ryujinx.HLE/PerformanceStatistics.cs
+++ b/Ryujinx.HLE/PerformanceStatistics.cs
@@ -16,12 +16,12 @@ namespace Ryujinx.HLE
         private double[] _previousFrameTime;
 
         private double[] _averagePercent;
-        private double[] _accumulatedPercent;
+        private double[] _accumulatedActiveTime;
         private double[] _percentLastEndTime;
         private double[] _percentStartTime;
 
-        private long[] _framesRendered;
-        private long[] _percentCount;
+        private long[]   _framesRendered;
+        private double[] _percentTime;
 
         private object[] _frameLock;
         private object[] _percentLock;
@@ -36,13 +36,13 @@ namespace Ryujinx.HLE
             _accumulatedFrameTime = new double[1];
             _previousFrameTime    = new double[1];
 
-            _averagePercent     = new double[1];
-            _accumulatedPercent = new double[1];
-            _percentLastEndTime = new double[1];
-            _percentStartTime   = new double[1];
+            _averagePercent        = new double[1];
+            _accumulatedActiveTime = new double[1];
+            _percentLastEndTime    = new double[1];
+            _percentStartTime      = new double[1];
 
             _framesRendered = new long[1];
-            _percentCount   = new long[1];
+            _percentTime    = new double[1];
 
             _frameLock   = new object[] { new object() };
             _percentLock = new object[] { new object() };
@@ -91,16 +91,16 @@ namespace Ryujinx.HLE
 
             lock (_percentLock[percentType])
             {
-                if (_percentCount[percentType] > 0)
+                if (_percentTime[percentType] > 0)
                 {
-                    percent = _accumulatedPercent[percentType] / _percentCount[percentType];
+                    percent = (_accumulatedActiveTime[percentType] / _percentTime[percentType]) * 100;
                 }
 
                 _averagePercent[percentType] = percent;
 
-                _percentCount[percentType] = 0;
+                _percentTime[percentType] = 0;
 
-                _accumulatedPercent[percentType] = 0;
+                _accumulatedActiveTime[percentType] = 0;
             }
         }
 
@@ -138,13 +138,11 @@ namespace Ryujinx.HLE
             double elapsedTime = currentTime - _percentLastEndTime[percentType];
             double elapsedActiveTime = currentTime - _percentStartTime[percentType];
 
-            double percentActive = (elapsedActiveTime / elapsedTime) * 100;
-
             lock (_percentLock[percentType])
             {
-                _accumulatedPercent[percentType] += percentActive;
+                _accumulatedActiveTime[percentType] += elapsedActiveTime;
 
-                _percentCount[percentType]++;
+                _percentTime[percentType] += elapsedTime;
             }
 
             _percentLastEndTime[percentType] = currentTime;
diff --git a/Ryujinx.HLE/Switch.cs b/Ryujinx.HLE/Switch.cs
index 6014ccff..332727d3 100644
--- a/Ryujinx.HLE/Switch.cs
+++ b/Ryujinx.HLE/Switch.cs
@@ -177,6 +177,11 @@ namespace Ryujinx.HLE
             Gpu.GPFifo.DispatchCalls();
         }
 
+        public bool ConsumeFrameAvailable()
+        {
+            return Gpu.Window.ConsumeFrameAvailable();
+        }
+
         public void PresentFrame(Action swapBuffersCallback)
         {
             Gpu.Window.Present(swapBuffersCallback);
diff --git a/Ryujinx/Ui/GLRenderer.cs b/Ryujinx/Ui/GLRenderer.cs
index 3a4dc326..c20cc78b 100644
--- a/Ryujinx/Ui/GLRenderer.cs
+++ b/Ryujinx/Ui/GLRenderer.cs
@@ -404,16 +404,19 @@ namespace Ryujinx.Ui
                     _device.Statistics.RecordFifoEnd();
                 }
 
-                string dockedMode = ConfigurationState.Instance.System.EnableDockedMode ? "Docked" : "Handheld";
-                float scale = Graphics.Gpu.GraphicsConfig.ResScale;
-                if (scale != 1)
+                while (_device.ConsumeFrameAvailable())
                 {
-                    dockedMode += $" ({scale}x)";
+                    _device.PresentFrame(SwapBuffers);
                 }
 
                 if (_ticks >= _ticksPerFrame)
                 {
-                    _device.PresentFrame(SwapBuffers);
+                    string dockedMode = ConfigurationState.Instance.System.EnableDockedMode ? "Docked" : "Handheld";
+                    float scale = Graphics.Gpu.GraphicsConfig.ResScale;
+                    if (scale != 1)
+                    {
+                        dockedMode += $" ({scale}x)";
+                    }
 
                     StatusUpdatedEvent?.Invoke(this, new StatusUpdatedEventArgs(
                         _device.EnableDeviceVsync,