From e25b7c9848b6ec486eb513297b5c536857665c7f Mon Sep 17 00:00:00 2001
From: gdkchan <gab.dark.100@gmail.com>
Date: Thu, 5 Dec 2019 17:34:47 -0300
Subject: [PATCH] Initial support for the guest OpenGL driver (NVIDIA and
 Nouveau)

---
 Ryujinx.Graphics.GAL/Blend/BlendFactor.cs     |  18 ++-
 Ryujinx.Graphics.GAL/Blend/BlendOp.cs         |   8 +-
 Ryujinx.Graphics.GAL/CompareOp.cs             |  11 +-
 Ryujinx.Graphics.GAL/ITexture.cs              |   2 +-
 .../Texture/TextureCreateInfo.cs              |   5 +
 Ryujinx.Graphics.Gpu/Engine/Compute.cs        |   2 +-
 Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs  |   3 +-
 Ryujinx.Graphics.Gpu/Engine/Inline2Memory.cs  |  87 +++++++++++-
 .../Engine/MethodCopyTexture.cs               |  23 ++++
 .../Engine/MethodUniformBufferUpdate.cs       |   6 +
 Ryujinx.Graphics.Gpu/Engine/Methods.cs        |  76 +++++++----
 Ryujinx.Graphics.Gpu/Image/Texture.cs         |  31 ++++-
 .../Image/TextureBindingsManager.cs           |  18 ++-
 Ryujinx.Graphics.Gpu/Image/TextureManager.cs  |  21 ++-
 .../State/BlendStateCommon.cs                 |  16 +++
 Ryujinx.Graphics.Gpu/State/GpuState.cs        |  22 +++
 Ryujinx.Graphics.Gpu/State/MethodOffset.cs    |   5 +
 Ryujinx.Graphics.Gpu/State/SamplerIndex.cs    |   8 ++
 .../Converters/BlendFactorConverter.cs        |  72 +++++++---
 .../Converters/BlendOpConverter.cs            |  20 ++-
 .../Converters/CompareOpConverter.cs          |  35 +++--
 .../Converters/MagFilterConverter.cs          |   2 +-
 .../Converters/MinFilterConverter.cs          |   2 +-
 Ryujinx.Graphics.OpenGL/TextureView.cs        |  61 +++++++--
 Ryujinx.Graphics.Texture/LayoutConverter.cs   | 129 ++++++++++++++++--
 25 files changed, 581 insertions(+), 102 deletions(-)
 create mode 100644 Ryujinx.Graphics.Gpu/State/BlendStateCommon.cs
 create mode 100644 Ryujinx.Graphics.Gpu/State/SamplerIndex.cs

diff --git a/Ryujinx.Graphics.GAL/Blend/BlendFactor.cs b/Ryujinx.Graphics.GAL/Blend/BlendFactor.cs
index 0eda08a7f..f92e5b3a8 100644
--- a/Ryujinx.Graphics.GAL/Blend/BlendFactor.cs
+++ b/Ryujinx.Graphics.GAL/Blend/BlendFactor.cs
@@ -20,6 +20,22 @@ namespace Ryujinx.Graphics.GAL.Blend
         ConstantColor = 0xc001,
         OneMinusConstantColor,
         ConstantAlpha,
-        OneMinusConstantAlpha
+        OneMinusConstantAlpha,
+
+        ZeroGl                  = 0x4000,
+        OneGl                   = 0x4001,
+        SrcColorGl              = 0x4300,
+        OneMinusSrcColorGl      = 0x4301,
+        SrcAlphaGl              = 0x4302,
+        OneMinusSrcAlphaGl      = 0x4303,
+        DstAlphaGl              = 0x4304,
+        OneMinusDstAlphaGl      = 0x4305,
+        DstColorGl              = 0x4306,
+        OneMinusDstColorGl      = 0x4307,
+        SrcAlphaSaturateGl      = 0x4308,
+        Src1ColorGl             = 0xc900,
+        OneMinusSrc1ColorGl     = 0xc901,
+        Src1AlphaGl             = 0xc902,
+        OneMinusSrc1AlphaGl     = 0xc903
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.GAL/Blend/BlendOp.cs b/Ryujinx.Graphics.GAL/Blend/BlendOp.cs
index 51a0062d7..566dafd93 100644
--- a/Ryujinx.Graphics.GAL/Blend/BlendOp.cs
+++ b/Ryujinx.Graphics.GAL/Blend/BlendOp.cs
@@ -6,6 +6,12 @@ namespace Ryujinx.Graphics.GAL.Blend
         Subtract,
         ReverseSubtract,
         Minimum,
-        Maximum
+        Maximum,
+
+        AddGl             = 0x8006,
+        SubtractGl        = 0x8007,
+        ReverseSubtractGl = 0x8008,
+        MinimumGl         = 0x800a,
+        MaximumGl         = 0x800b
     }
 }
diff --git a/Ryujinx.Graphics.GAL/CompareOp.cs b/Ryujinx.Graphics.GAL/CompareOp.cs
index da5d5067c..358ed2b46 100644
--- a/Ryujinx.Graphics.GAL/CompareOp.cs
+++ b/Ryujinx.Graphics.GAL/CompareOp.cs
@@ -9,6 +9,15 @@ namespace Ryujinx.Graphics.GAL
         Greater,
         NotEqual,
         GreaterOrEqual,
-        Always
+        Always,
+
+        NeverGl          = 0x200,
+        LessGl           = 0x201,
+        EqualGl          = 0x202,
+        LessOrEqualGl    = 0x203,
+        GreaterGl        = 0x204,
+        NotEqualGl       = 0x205,
+        GreaterOrEqualGl = 0x206,
+        AlwaysGl         = 0x207,
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.GAL/ITexture.cs b/Ryujinx.Graphics.GAL/ITexture.cs
index f170e3741..592c0482d 100644
--- a/Ryujinx.Graphics.GAL/ITexture.cs
+++ b/Ryujinx.Graphics.GAL/ITexture.cs
@@ -14,7 +14,7 @@ namespace Ryujinx.Graphics.GAL
 
         int GetStorageDebugId();
 
-        byte[] GetData(int face);
+        byte[] GetData();
 
         void SetData(Span<byte> data);
     }
diff --git a/Ryujinx.Graphics.GAL/Texture/TextureCreateInfo.cs b/Ryujinx.Graphics.GAL/Texture/TextureCreateInfo.cs
index ad365f6b1..c6a73d917 100644
--- a/Ryujinx.Graphics.GAL/Texture/TextureCreateInfo.cs
+++ b/Ryujinx.Graphics.GAL/Texture/TextureCreateInfo.cs
@@ -66,6 +66,11 @@ namespace Ryujinx.Graphics.GAL.Texture
             return GetMipStride(level) * GetLevelHeight(level) * GetLevelDepth(level);
         }
 
+        public int GetMipSize2D(int level)
+        {
+            return GetMipStride(level) * GetLevelHeight(level);
+        }
+
         public int GetMipStride(int level)
         {
             return BitUtils.AlignUp(GetLevelWidth(level) * BytesPerPixel, 4);
diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
index eb7c4f4a1..f0daac678 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
@@ -30,7 +30,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
             var samplerPool = state.Get<PoolState>(MethodOffset.SamplerPoolState);
 
-            _textureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId);
+            _textureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, dispatchParams.SamplerIndex);
 
             var texturePool = state.Get<PoolState>(MethodOffset.TexturePoolState);
 
diff --git a/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs b/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs
index 03582f050..77e60aa48 100644
--- a/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs
@@ -1,3 +1,4 @@
+using Ryujinx.Graphics.Gpu.State;
 using System;
 using System.Runtime.InteropServices;
 
@@ -32,7 +33,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
         public int ShaderOffset;
         public int Unknown9;
         public int Unknown10;
-        public int Unknown11;
+        public SamplerIndex SamplerIndex;
         public int GridSizeX;
         public int GridSizeYZ;
         public int Unknown14;
diff --git a/Ryujinx.Graphics.Gpu/Engine/Inline2Memory.cs b/Ryujinx.Graphics.Gpu/Engine/Inline2Memory.cs
index 09de992f8..8d1ebebe1 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Inline2Memory.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Inline2Memory.cs
@@ -1,5 +1,8 @@
+using Ryujinx.Common;
 using Ryujinx.Graphics.Gpu.State;
+using Ryujinx.Graphics.Texture;
 using System;
+using System.Runtime.InteropServices;
 
 namespace Ryujinx.Graphics.Gpu.Engine
 {
@@ -12,6 +15,10 @@ namespace Ryujinx.Graphics.Gpu.Engine
         private int _offset;
         private int _size;
 
+        private bool _finished;
+
+        private int[] _buffer;
+
         public void LaunchDma(GpuState state, int argument)
         {
             _params = state.Get<Inline2MemoryParams>(MethodOffset.I2mParams);
@@ -20,23 +27,91 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
             _offset = 0;
             _size   = _params.LineLengthIn * _params.LineCount;
+
+            int count = BitUtils.DivRoundUp(_size, 4);
+
+            if (_buffer == null || _buffer.Length < count)
+            {
+                _buffer = new int[count];
+            }
+
+            ulong dstBaseAddress = _context.MemoryManager.Translate(_params.DstAddress.Pack());
+
+            _context.Methods.TextureManager.Flush(dstBaseAddress, (ulong)_size);
+
+            _finished = false;
         }
 
         public void LoadInlineData(GpuState state, int argument)
         {
-            if (_isLinear)
+            if (!_finished)
             {
-                for (int shift = 0; shift < 32 && _offset < _size; shift += 8, _offset++)
-                {
-                    ulong gpuVa = _params.DstAddress.Pack() + (ulong)_offset;
+                _buffer[_offset++] = argument;
 
-                    _context.MemoryAccessor.Write(gpuVa, new byte[] { (byte)(argument >> shift) });
+                if (_offset * 4 >= _size)
+                {
+                    FinishTransfer();
                 }
             }
+        }
+
+        private void FinishTransfer()
+        {
+            Span<byte> data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
+
+            if (_isLinear && _params.LineCount == 1)
+            {
+                ulong address = _context.MemoryManager.Translate( _params.DstAddress.Pack());
+
+                _context.PhysicalMemory.Write(address, data);
+            }
             else
             {
-                throw new NotImplementedException();
+                var dstCalculator = new OffsetCalculator(
+                    _params.DstWidth,
+                    _params.DstHeight,
+                    _params.DstStride,
+                    _isLinear,
+                    _params.DstMemoryLayout.UnpackGobBlocksInY(),
+                    1);
+
+                int srcOffset = 0;
+
+                ulong dstBaseAddress = _context.MemoryManager.Translate(_params.DstAddress.Pack());
+
+                for (int y = _params.DstY; y < _params.DstY + _params.LineCount; y++)
+                {
+                    int x1      = _params.DstX;
+                    int x2      = _params.DstX + _params.LineLengthIn;
+                    int x2Trunc = _params.DstX + BitUtils.AlignDown(_params.LineLengthIn, 16);
+
+                    int x;
+
+                    for (x = x1; x < x2Trunc; x += 16, srcOffset += 16)
+                    {
+                        int dstOffset = dstCalculator.GetOffset(x, y);
+
+                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
+
+                        Span<byte> pixel = data.Slice(srcOffset, 16);
+
+                        _context.PhysicalMemory.Write(dstAddress, pixel);
+                    }
+
+                    for (; x < x2; x++, srcOffset++)
+                    {
+                        int dstOffset = dstCalculator.GetOffset(x, y);
+
+                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;
+
+                        Span<byte> pixel = data.Slice(srcOffset, 1);
+
+                        _context.PhysicalMemory.Write(dstAddress, pixel);
+                    }
+                }
             }
+
+            _finished = true;
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodCopyTexture.cs b/Ryujinx.Graphics.Gpu/Engine/MethodCopyTexture.cs
index c482451a3..1b47eac2b 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodCopyTexture.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodCopyTexture.cs
@@ -64,6 +64,29 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
             srcTexture.HostTexture.CopyTo(dstTexture.HostTexture, srcRegion, dstRegion, linearFilter);
 
+            // For an out of bounds copy, we must ensure that the copy wraps to the next line,
+            // so for a copy from a 64x64 texture, in the region [32, 96[, there are 32 pixels that are
+            // outside the bounds of the texture. We fill the destination with the first 32 pixels
+            // of the next line on the source texture.
+            // This can be emulated with 2 copies (the first copy handles the region inside the bounds,
+            // the second handles the region outside of the bounds).
+            // We must also extend the source texture by one line to ensure we can wrap on the last line.
+            // This is required by the (guest) OpenGL driver.
+            if (srcRegion.X2 > srcTexture.Info.Width)
+            {
+                srcCopyTexture.Height++;
+
+                srcTexture = _textureManager.FindOrCreateTexture(srcCopyTexture);
+
+                srcRegion = new Extents2D(
+                    srcRegion.X1 - srcTexture.Info.Width,
+                    srcRegion.Y1 + 1,
+                    srcRegion.X2 - srcTexture.Info.Width,
+                    srcRegion.Y2 + 1);
+
+                srcTexture.HostTexture.CopyTo(dstTexture.HostTexture, srcRegion, dstRegion, linearFilter);
+            }
+
             dstTexture.Modified = true;
         }
     }
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs b/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs
index 43bab2433..12d44f511 100644
--- a/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs
@@ -1,3 +1,4 @@
+using Ryujinx.Graphics.Gpu.Memory;
 using Ryujinx.Graphics.Gpu.State;
 
 namespace Ryujinx.Graphics.Gpu.Engine
@@ -8,6 +9,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
         {
             var uniformBuffer = state.Get<UniformBufferState>(MethodOffset.UniformBufferState);
 
+            if (_context.MemoryManager.Translate(uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset) == MemoryManager.BadAddress)
+            {
+                return;
+            }
+
             _context.MemoryAccessor.Write(uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset, argument);
 
             state.SetUniformBufferOffset(uniformBuffer.Offset + 4);
diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
index 19e679932..18fd7e708 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
@@ -89,8 +89,8 @@ namespace Ryujinx.Graphics.Gpu.Engine
             UpdateRenderTargetStateIfNeeded(state);
 
             if (state.QueryModified(MethodOffset.DepthTestEnable,
-                                             MethodOffset.DepthWriteEnable,
-                                             MethodOffset.DepthTestFunc))
+                                    MethodOffset.DepthWriteEnable,
+                                    MethodOffset.DepthTestFunc))
             {
                 UpdateDepthTestState(state);
             }
@@ -101,16 +101,16 @@ namespace Ryujinx.Graphics.Gpu.Engine
             }
 
             if (state.QueryModified(MethodOffset.DepthBiasState,
-                                             MethodOffset.DepthBiasFactor,
-                                             MethodOffset.DepthBiasUnits,
-                                             MethodOffset.DepthBiasClamp))
+                                    MethodOffset.DepthBiasFactor,
+                                    MethodOffset.DepthBiasUnits,
+                                    MethodOffset.DepthBiasClamp))
             {
                 UpdateDepthBiasState(state);
             }
 
             if (state.QueryModified(MethodOffset.StencilBackMasks,
-                                             MethodOffset.StencilTestState,
-                                             MethodOffset.StencilBackTestState))
+                                    MethodOffset.StencilTestState,
+                                    MethodOffset.StencilBackTestState))
             {
                 UpdateStencilTestState(state);
             }
@@ -143,9 +143,9 @@ namespace Ryujinx.Graphics.Gpu.Engine
             }
 
             if (state.QueryModified(MethodOffset.VertexBufferDrawState,
-                                             MethodOffset.VertexBufferInstanced,
-                                             MethodOffset.VertexBufferState,
-                                             MethodOffset.VertexBufferEndAddress))
+                                    MethodOffset.VertexBufferInstanced,
+                                    MethodOffset.VertexBufferState,
+                                    MethodOffset.VertexBufferEndAddress))
             {
                 UpdateVertexBufferState(state);
             }
@@ -160,7 +160,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
                 UpdateRtColorMask(state);
             }
 
-            if (state.QueryModified(MethodOffset.BlendEnable, MethodOffset.BlendState))
+            if (state.QueryModified(MethodOffset.BlendIndependent,
+                                    MethodOffset.BlendStateCommon,
+                                    MethodOffset.BlendEnableCommon,
+                                    MethodOffset.BlendEnable,
+                                    MethodOffset.BlendState))
             {
                 UpdateBlendState(state);
             }
@@ -288,6 +292,10 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
         private void UpdateViewportTransform(GpuState state)
         {
+            bool flipY = (state.Get<int>(MethodOffset.YControl) & 1) != 0;
+
+            float yFlip = flipY ? -1 : 1;
+
             Viewport[] viewports = new Viewport[Constants.TotalViewports];
 
             for (int index = 0; index < Constants.TotalViewports; index++)
@@ -299,7 +307,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
                 float y = transform.TranslateY - MathF.Abs(transform.ScaleY);
 
                 float width  = transform.ScaleX * 2;
-                float height = transform.ScaleY * 2;
+                float height = transform.ScaleY * 2 * yFlip;
 
                 RectangleF region = new RectangleF(x, y, width, height);
 
@@ -390,7 +398,9 @@ namespace Ryujinx.Graphics.Gpu.Engine
         {
             var samplerPool = state.Get<PoolState>(MethodOffset.SamplerPoolState);
 
-            _textureManager.SetGraphicsSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId);
+            var samplerIndex = state.Get<SamplerIndex>(MethodOffset.SamplerIndex);
+
+            _textureManager.SetGraphicsSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, samplerIndex);
         }
 
         private void UpdateTexturePoolState(GpuState state)
@@ -548,22 +558,42 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
         private void UpdateBlendState(GpuState state)
         {
+            bool blendIndependent = state.Get<Boolean32>(MethodOffset.BlendIndependent);
+
             BlendState[] blends = new BlendState[8];
 
             for (int index = 0; index < 8; index++)
             {
-                bool enable = state.Get<Boolean32>(MethodOffset.BlendEnable, index);
+                BlendDescriptor descriptor;
 
-                var blend = state.Get<BlendState>(MethodOffset.BlendState, index);
+                if (blendIndependent)
+                {
+                    bool enable = state.Get<Boolean32> (MethodOffset.BlendEnable, index);
+                    var  blend  = state.Get<BlendState>(MethodOffset.BlendState,  index);
 
-                BlendDescriptor descriptor = new BlendDescriptor(
-                    enable,
-                    blend.ColorOp,
-                    blend.ColorSrcFactor,
-                    blend.ColorDstFactor,
-                    blend.AlphaOp,
-                    blend.AlphaSrcFactor,
-                    blend.AlphaDstFactor);
+                    descriptor = new BlendDescriptor(
+                        enable,
+                        blend.ColorOp,
+                        blend.ColorSrcFactor,
+                        blend.ColorDstFactor,
+                        blend.AlphaOp,
+                        blend.AlphaSrcFactor,
+                        blend.AlphaDstFactor);
+                }
+                else
+                {
+                    bool enable = state.Get<Boolean32>       (MethodOffset.BlendEnable, 0);
+                    var  blend  = state.Get<BlendStateCommon>(MethodOffset.BlendStateCommon);
+
+                    descriptor = new BlendDescriptor(
+                        enable,
+                        blend.ColorOp,
+                        blend.ColorSrcFactor,
+                        blend.ColorDstFactor,
+                        blend.AlphaOp,
+                        blend.AlphaSrcFactor,
+                        blend.AlphaDstFactor);
+                }
 
                 _context.Renderer.Pipeline.BindBlendState(index, descriptor);
             }
diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs
index 120abe3fd..5e0575883 100644
--- a/Ryujinx.Graphics.Gpu/Image/Texture.cs
+++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs
@@ -277,7 +277,36 @@ namespace Ryujinx.Graphics.Gpu.Image
 
         public void Flush()
         {
-            byte[] data = HostTexture.GetData(0);
+            Span<byte> data = HostTexture.GetData();
+
+            if (_info.IsLinear)
+            {
+                data = LayoutConverter.ConvertLinearToLinearStrided(
+                    _info.Width,
+                    _info.Height,
+                    _info.FormatInfo.BlockWidth,
+                    _info.FormatInfo.BlockHeight,
+                    _info.Stride,
+                    _info.FormatInfo.BytesPerPixel,
+                    data);
+            }
+            else
+            {
+                data = LayoutConverter.ConvertLinearToBlockLinear(
+                    _info.Width,
+                    _info.Height,
+                    _depth,
+                    _info.Levels,
+                    _layers,
+                    _info.FormatInfo.BlockWidth,
+                    _info.FormatInfo.BlockHeight,
+                    _info.FormatInfo.BytesPerPixel,
+                    _info.GobBlocksInY,
+                    _info.GobBlocksInZ,
+                    _info.GobBlocksInTileX,
+                    _sizeInfo,
+                    data);
+            }
 
             _context.PhysicalMemory.Write(Address, data);
         }
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs
index 74d7a76a9..290bb665f 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs
@@ -1,4 +1,5 @@
 using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.State;
 using Ryujinx.Graphics.Shader;
 using System;
 
@@ -12,6 +13,8 @@ namespace Ryujinx.Graphics.Gpu.Image
 
         private SamplerPool _samplerPool;
 
+        private SamplerIndex _samplerIndex;
+
         private ulong _texturePoolAddress;
         private int   _texturePoolMaximumId;
 
@@ -67,7 +70,7 @@ namespace Ryujinx.Graphics.Gpu.Image
             _textureBufferIndex = index;
         }
 
-        public void SetSamplerPool(ulong gpuVa, int maximumId)
+        public void SetSamplerPool(ulong gpuVa, int maximumId, SamplerIndex samplerIndex)
         {
             ulong address = _context.MemoryManager.Translate(gpuVa);
 
@@ -82,6 +85,8 @@ namespace Ryujinx.Graphics.Gpu.Image
             }
 
             _samplerPool = new SamplerPool(_context, address, maximumId);
+
+            _samplerIndex = samplerIndex;
         }
 
         public void SetTexturePool(ulong gpuVa, int maximumId)
@@ -131,7 +136,16 @@ namespace Ryujinx.Graphics.Gpu.Image
                 int packedId = ReadPackedId(stageIndex, binding.Handle);
 
                 int textureId = UnpackTextureId(packedId);
-                int samplerId = UnpackSamplerId(packedId);
+                int samplerId;
+
+                if (_samplerIndex == SamplerIndex.ViaHeaderIndex)
+                {
+                    samplerId = textureId;
+                }
+                else
+                {
+                    samplerId = UnpackSamplerId(packedId);
+                }
 
                 Texture texture = pool.Get(textureId);
 
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs
index 5e5b1c978..ce0cc249f 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs
@@ -81,14 +81,14 @@ namespace Ryujinx.Graphics.Gpu.Image
             _gpBindingsManager.SetTextureBufferIndex(index);
         }
 
-        public void SetComputeSamplerPool(ulong gpuVa, int maximumId)
+        public void SetComputeSamplerPool(ulong gpuVa, int maximumId, SamplerIndex samplerIndex)
         {
-            _cpBindingsManager.SetSamplerPool(gpuVa, maximumId);
+            _cpBindingsManager.SetSamplerPool(gpuVa, maximumId, samplerIndex);
         }
 
-        public void SetGraphicsSamplerPool(ulong gpuVa, int maximumId)
+        public void SetGraphicsSamplerPool(ulong gpuVa, int maximumId, SamplerIndex samplerIndex)
         {
-            _gpBindingsManager.SetSamplerPool(gpuVa, maximumId);
+            _gpBindingsManager.SetSamplerPool(gpuVa, maximumId, samplerIndex);
         }
 
         public void SetComputeTexturePool(ulong gpuVa, int maximumId)
@@ -599,6 +599,19 @@ namespace Ryujinx.Graphics.Gpu.Image
             }
         }
 
+        public void Flush(ulong address, ulong size)
+        {
+            foreach (Texture texture in _cache)
+            {
+                if (texture.OverlapsWith(address, size) && texture.Modified)
+                {
+                    texture.Flush();
+
+                    texture.Modified = false;
+                }
+            }
+        }
+
         public void RemoveTextureFromCache(Texture texture)
         {
             _textures.Remove(texture);
diff --git a/Ryujinx.Graphics.Gpu/State/BlendStateCommon.cs b/Ryujinx.Graphics.Gpu/State/BlendStateCommon.cs
new file mode 100644
index 000000000..96465a255
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/State/BlendStateCommon.cs
@@ -0,0 +1,16 @@
+using Ryujinx.Graphics.GAL.Blend;
+
+namespace Ryujinx.Graphics.Gpu.State
+{
+    struct BlendStateCommon
+    {
+        public Boolean32   SeparateAlpha;
+        public BlendOp     ColorOp;
+        public BlendFactor ColorSrcFactor;
+        public BlendFactor ColorDstFactor;
+        public BlendOp     AlphaOp;
+        public BlendFactor AlphaSrcFactor;
+        public uint        Unknown0x1354;
+        public BlendFactor AlphaDstFactor;
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/State/GpuState.cs b/Ryujinx.Graphics.Gpu/State/GpuState.cs
index bf7734dd3..13f777c91 100644
--- a/Ryujinx.Graphics.Gpu/State/GpuState.cs
+++ b/Ryujinx.Graphics.Gpu/State/GpuState.cs
@@ -162,6 +162,28 @@ namespace Ryujinx.Graphics.Gpu.State
             return modified;
         }
 
+        public bool QueryModified(
+            MethodOffset m1,
+            MethodOffset m2,
+            MethodOffset m3,
+            MethodOffset m4,
+            MethodOffset m5)
+        {
+            bool modified = _registers[(int)m1].Modified ||
+                            _registers[(int)m2].Modified ||
+                            _registers[(int)m3].Modified ||
+                            _registers[(int)m4].Modified ||
+                            _registers[(int)m5].Modified;
+
+            _registers[(int)m1].Modified = false;
+            _registers[(int)m2].Modified = false;
+            _registers[(int)m3].Modified = false;
+            _registers[(int)m4].Modified = false;
+            _registers[(int)m5].Modified = false;
+
+            return modified;
+        }
+
         public T Get<T>(MethodOffset offset, int index) where T : struct
         {
             Register register = _registers[(int)offset];
diff --git a/Ryujinx.Graphics.Gpu/State/MethodOffset.cs b/Ryujinx.Graphics.Gpu/State/MethodOffset.cs
index 4c5e0beb2..a560c257c 100644
--- a/Ryujinx.Graphics.Gpu/State/MethodOffset.cs
+++ b/Ryujinx.Graphics.Gpu/State/MethodOffset.cs
@@ -32,11 +32,16 @@ namespace Ryujinx.Graphics.Gpu.State
         RtDepthStencilState             = 0x3f8,
         VertexAttribState               = 0x458,
         RtDepthStencilSize              = 0x48a,
+        SamplerIndex                    = 0x48d,
         DepthTestEnable                 = 0x4b3,
+        BlendIndependent                = 0x4b9,
         DepthWriteEnable                = 0x4ba,
         DepthTestFunc                   = 0x4c3,
+        BlendStateCommon                = 0x4cf,
+        BlendEnableCommon               = 0x4d7,
         BlendEnable                     = 0x4d8,
         StencilTestState                = 0x4e0,
+        YControl                        = 0x4eb,
         FirstVertex                     = 0x50d,
         FirstInstance                   = 0x50e,
         ResetCounter                    = 0x54c,
diff --git a/Ryujinx.Graphics.Gpu/State/SamplerIndex.cs b/Ryujinx.Graphics.Gpu/State/SamplerIndex.cs
new file mode 100644
index 000000000..651983f1e
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/State/SamplerIndex.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Gpu.State
+{
+    enum SamplerIndex
+    {
+        Independently  = 0,
+        ViaHeaderIndex = 1
+    }
+}
\ No newline at end of file
diff --git a/Ryujinx.Graphics.OpenGL/Converters/BlendFactorConverter.cs b/Ryujinx.Graphics.OpenGL/Converters/BlendFactorConverter.cs
index 1dc405226..db5099a72 100644
--- a/Ryujinx.Graphics.OpenGL/Converters/BlendFactorConverter.cs
+++ b/Ryujinx.Graphics.OpenGL/Converters/BlendFactorConverter.cs
@@ -10,25 +10,59 @@ namespace Ryujinx.Graphics.OpenGL
         {
             switch (factor)
             {
-                case BlendFactor.Zero:                  return All.Zero;
-                case BlendFactor.One:                   return All.One;
-                case BlendFactor.SrcColor:              return All.SrcColor;
-                case BlendFactor.OneMinusSrcColor:      return All.OneMinusSrcColor;
-                case BlendFactor.SrcAlpha:              return All.SrcAlpha;
-                case BlendFactor.OneMinusSrcAlpha:      return All.OneMinusSrcAlpha;
-                case BlendFactor.DstAlpha:              return All.DstAlpha;
-                case BlendFactor.OneMinusDstAlpha:      return All.OneMinusDstAlpha;
-                case BlendFactor.DstColor:              return All.DstColor;
-                case BlendFactor.OneMinusDstColor:      return All.OneMinusDstColor;
-                case BlendFactor.SrcAlphaSaturate:      return All.SrcAlphaSaturate;
-                case BlendFactor.Src1Color:             return All.Src1Color;
-                case BlendFactor.OneMinusSrc1Color:     return All.OneMinusSrc1Color;
-                case BlendFactor.Src1Alpha:             return All.Src1Alpha;
-                case BlendFactor.OneMinusSrc1Alpha:     return All.OneMinusSrc1Alpha;
-                case BlendFactor.ConstantColor:         return All.ConstantColor;
-                case BlendFactor.OneMinusConstantColor: return All.OneMinusConstantColor;
-                case BlendFactor.ConstantAlpha:         return All.ConstantAlpha;
-                case BlendFactor.OneMinusConstantAlpha: return All.OneMinusConstantAlpha;
+                case BlendFactor.Zero:
+                case BlendFactor.ZeroGl:
+                    return All.Zero;
+                case BlendFactor.One:
+                case BlendFactor.OneGl:
+                    return All.One;
+                case BlendFactor.SrcColor:
+                case BlendFactor.SrcColorGl:
+                    return All.SrcColor;
+                case BlendFactor.OneMinusSrcColor:
+                case BlendFactor.OneMinusSrcColorGl:
+                    return All.OneMinusSrcColor;
+                case BlendFactor.SrcAlpha:
+                case BlendFactor.SrcAlphaGl:
+                    return All.SrcAlpha;
+                case BlendFactor.OneMinusSrcAlpha:
+                case BlendFactor.OneMinusSrcAlphaGl:
+                    return All.OneMinusSrcAlpha;
+                case BlendFactor.DstAlpha:
+                case BlendFactor.DstAlphaGl:
+                    return All.DstAlpha;
+                case BlendFactor.OneMinusDstAlpha:
+                case BlendFactor.OneMinusDstAlphaGl:
+                    return All.OneMinusDstAlpha;
+                case BlendFactor.DstColor:
+                case BlendFactor.DstColorGl:
+                    return All.DstColor;
+                case BlendFactor.OneMinusDstColor:
+                case BlendFactor.OneMinusDstColorGl:
+                    return All.OneMinusDstColor;
+                case BlendFactor.SrcAlphaSaturate:
+                case BlendFactor.SrcAlphaSaturateGl:
+                    return All.SrcAlphaSaturate;
+                case BlendFactor.Src1Color:
+                case BlendFactor.Src1ColorGl:
+                    return All.Src1Color;
+                case BlendFactor.OneMinusSrc1Color:
+                case BlendFactor.OneMinusSrc1ColorGl:
+                    return All.OneMinusSrc1Color;
+                case BlendFactor.Src1Alpha:
+                case BlendFactor.Src1AlphaGl:
+                    return All.Src1Alpha;
+                case BlendFactor.OneMinusSrc1Alpha:
+                case BlendFactor.OneMinusSrc1AlphaGl:
+                    return All.OneMinusSrc1Alpha;
+                case BlendFactor.ConstantColor:
+                    return All.ConstantColor;
+                case BlendFactor.OneMinusConstantColor:
+                    return All.OneMinusConstantColor;
+                case BlendFactor.ConstantAlpha:
+                    return All.ConstantAlpha;
+                case BlendFactor.OneMinusConstantAlpha:
+                    return All.OneMinusConstantAlpha;
             }
 
             return All.Zero;
diff --git a/Ryujinx.Graphics.OpenGL/Converters/BlendOpConverter.cs b/Ryujinx.Graphics.OpenGL/Converters/BlendOpConverter.cs
index b33a3bf89..66d6a1459 100644
--- a/Ryujinx.Graphics.OpenGL/Converters/BlendOpConverter.cs
+++ b/Ryujinx.Graphics.OpenGL/Converters/BlendOpConverter.cs
@@ -10,11 +10,21 @@ namespace Ryujinx.Graphics.OpenGL
         {
             switch (op)
             {
-                case BlendOp.Add:             return BlendEquationMode.FuncAdd;
-                case BlendOp.Subtract:        return BlendEquationMode.FuncSubtract;
-                case BlendOp.ReverseSubtract: return BlendEquationMode.FuncReverseSubtract;
-                case BlendOp.Minimum:         return BlendEquationMode.Min;
-                case BlendOp.Maximum:         return BlendEquationMode.Max;
+                case BlendOp.Add:
+                case BlendOp.AddGl:
+                    return BlendEquationMode.FuncAdd;
+                case BlendOp.Subtract:
+                case BlendOp.SubtractGl:
+                    return BlendEquationMode.FuncSubtract;
+                case BlendOp.ReverseSubtract:
+                case BlendOp.ReverseSubtractGl:
+                    return BlendEquationMode.FuncReverseSubtract;
+                case BlendOp.Minimum:
+                case BlendOp.MinimumGl:
+                    return BlendEquationMode.Min;
+                case BlendOp.Maximum:
+                case BlendOp.MaximumGl:
+                    return BlendEquationMode.Max;
             }
 
             return BlendEquationMode.FuncAdd;
diff --git a/Ryujinx.Graphics.OpenGL/Converters/CompareOpConverter.cs b/Ryujinx.Graphics.OpenGL/Converters/CompareOpConverter.cs
index f592735bd..a30ca02ea 100644
--- a/Ryujinx.Graphics.OpenGL/Converters/CompareOpConverter.cs
+++ b/Ryujinx.Graphics.OpenGL/Converters/CompareOpConverter.cs
@@ -1,6 +1,5 @@
 using OpenTK.Graphics.OpenGL;
 using Ryujinx.Graphics.GAL;
-using System;
 
 namespace Ryujinx.Graphics.OpenGL
 {
@@ -10,19 +9,33 @@ namespace Ryujinx.Graphics.OpenGL
         {
             switch (op)
             {
-                case CompareOp.Never:          return All.Never;
-                case CompareOp.Less:           return All.Less;
-                case CompareOp.Equal:          return All.Equal;
-                case CompareOp.LessOrEqual:    return All.Lequal;
-                case CompareOp.Greater:        return All.Greater;
-                case CompareOp.NotEqual:       return All.Notequal;
-                case CompareOp.GreaterOrEqual: return All.Gequal;
-                case CompareOp.Always:         return All.Always;
+                case CompareOp.Never:
+                case CompareOp.NeverGl:
+                    return All.Never;
+                case CompareOp.Less:
+                case CompareOp.LessGl:
+                    return All.Less;
+                case CompareOp.Equal:
+                case CompareOp.EqualGl:
+                    return All.Equal;
+                case CompareOp.LessOrEqual:
+                case CompareOp.LessOrEqualGl:
+                    return All.Lequal;
+                case CompareOp.Greater:
+                case CompareOp.GreaterGl:
+                    return All.Greater;
+                case CompareOp.NotEqual:
+                case CompareOp.NotEqualGl:
+                    return All.Notequal;
+                case CompareOp.GreaterOrEqual:
+                case CompareOp.GreaterOrEqualGl:
+                    return All.Gequal;
+                case CompareOp.Always:
+                case CompareOp.AlwaysGl:
+                    return All.Always;
             }
 
             return All.Never;
-
-            throw new ArgumentException($"Invalid compare operation \"{op}\".");
         }
     }
 }
diff --git a/Ryujinx.Graphics.OpenGL/Converters/MagFilterConverter.cs b/Ryujinx.Graphics.OpenGL/Converters/MagFilterConverter.cs
index d86d8014a..cb75ee894 100644
--- a/Ryujinx.Graphics.OpenGL/Converters/MagFilterConverter.cs
+++ b/Ryujinx.Graphics.OpenGL/Converters/MagFilterConverter.cs
@@ -14,7 +14,7 @@ namespace Ryujinx.Graphics.OpenGL
                 case MagFilter.Linear:  return TextureMagFilter.Linear;
             }
 
-            throw new ArgumentException($"Invalid filter \"{filter}\".");
+            return TextureMagFilter.Nearest;
         }
     }
 }
diff --git a/Ryujinx.Graphics.OpenGL/Converters/MinFilterConverter.cs b/Ryujinx.Graphics.OpenGL/Converters/MinFilterConverter.cs
index 128577f85..014362998 100644
--- a/Ryujinx.Graphics.OpenGL/Converters/MinFilterConverter.cs
+++ b/Ryujinx.Graphics.OpenGL/Converters/MinFilterConverter.cs
@@ -18,7 +18,7 @@ namespace Ryujinx.Graphics.OpenGL
                 case MinFilter.LinearMipmapLinear:   return TextureMinFilter.LinearMipmapLinear;
             }
 
-            throw new ArgumentException($"Invalid filter \"{filter}\".");
+            return TextureMinFilter.Nearest;
         }
     }
 }
diff --git a/Ryujinx.Graphics.OpenGL/TextureView.cs b/Ryujinx.Graphics.OpenGL/TextureView.cs
index 8fced290d..769f03394 100644
--- a/Ryujinx.Graphics.OpenGL/TextureView.cs
+++ b/Ryujinx.Graphics.OpenGL/TextureView.cs
@@ -165,7 +165,29 @@ namespace Ryujinx.Graphics.OpenGL
             _renderer.TextureCopy.Copy(this, (TextureView)destination, srcRegion, dstRegion, linearFilter);
         }
 
-        public byte[] GetData(int face)
+        public byte[] GetData()
+        {
+            int size = 0;
+
+            for (int level = 0; level < _info.Levels; level++)
+            {
+                size += _info.GetMipSize(level);
+            }
+
+            byte[] data = new byte[size];
+
+            unsafe
+            {
+                fixed (byte* ptr = data)
+                {
+                    WriteTo((IntPtr)ptr);
+                }
+            }
+
+            return data;
+        }
+
+        private void WriteTo(IntPtr ptr)
         {
             TextureTarget target = Target.Convert();
 
@@ -173,28 +195,37 @@ namespace Ryujinx.Graphics.OpenGL
 
             FormatInfo format = FormatTable.GetFormatInfo(_info.Format);
 
-            int depth = _info.GetDepthOrLayers();
+            int faces = 1;
 
             if (target == TextureTarget.TextureCubeMap)
             {
-                target = TextureTarget.TextureCubeMapPositiveX + face;
+                target = TextureTarget.TextureCubeMapPositiveX;
+
+                faces = 6;
             }
 
-            if (format.IsCompressed)
+            for (int level = 0; level < _info.Levels; level++)
             {
-                byte[] data = new byte[_info.Width * _info.Height * depth * 4];
+                for (int face = 0; face < faces; face++)
+                {
+                    int faceOffset = face * _info.GetMipSize2D(level);
 
-                GL.GetTexImage(target, 0, PixelFormat.Rgba, PixelType.UnsignedByte, data);
+                    if (format.IsCompressed)
+                    {
+                        GL.GetCompressedTexImage(target + face, level, ptr + faceOffset);
+                    }
+                    else
+                    {
+                        GL.GetTexImage(
+                            target + face,
+                            level,
+                            format.PixelFormat,
+                            format.PixelType,
+                            ptr + faceOffset);
+                    }
+                }
 
-                return data;
-            }
-            else
-            {
-                byte[] data = new byte[_info.GetMipSize(0)];
-
-                GL.GetTexImage(target, 0, format.PixelFormat, format.PixelType, data);
-
-                return data;
+                ptr += _info.GetMipSize(level);
             }
         }
 
diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs
index ef80144ec..c270b494b 100644
--- a/Ryujinx.Graphics.Texture/LayoutConverter.cs
+++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs
@@ -7,7 +7,7 @@ namespace Ryujinx.Graphics.Texture
 {
     public static class LayoutConverter
     {
-        private const int AlignmentSize = 4;
+        private const int HostStrideAlignment = 4;
 
         public static Span<byte> ConvertBlockLinearToLinear(
             int width,
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Texture
             int gobBlocksInZ,
             int gobBlocksInTileX,
             SizeInfo sizeInfo,
-            Span<byte> data)
+            ReadOnlySpan<byte> data)
         {
             int outSize = GetTextureSize(
                 width,
@@ -62,7 +62,7 @@ namespace Ryujinx.Graphics.Texture
                     mipGobBlocksInZ >>= 1;
                 }
 
-                int stride   = BitUtils.AlignUp(w * bytesPerPixel, AlignmentSize);
+                int stride   = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
                 int wAligned = BitUtils.AlignUp(w, wAlignment);
 
                 BlockLinearLayout layoutConverter = new BlockLinearLayout(
@@ -104,17 +104,17 @@ namespace Ryujinx.Graphics.Texture
             int blockHeight,
             int stride,
             int bytesPerPixel,
-            Span<byte> data)
+            ReadOnlySpan<byte> data)
         {
-            int outOffs = 0;
-
             int w = BitUtils.DivRoundUp(width,  blockWidth);
             int h = BitUtils.DivRoundUp(height, blockHeight);
 
-            int outStride = BitUtils.AlignUp(w * bytesPerPixel, AlignmentSize);
+            int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
 
             Span<byte> output = new byte[h * outStride];
 
+            int outOffs = 0;
+
             for (int y = 0; y < h; y++)
             {
                 for (int x = 0; x < w; x++)
@@ -132,6 +132,119 @@ namespace Ryujinx.Graphics.Texture
             return output;
         }
 
+        public static Span<byte> ConvertLinearToBlockLinear(
+            int width,
+            int height,
+            int depth,
+            int levels,
+            int layers,
+            int blockWidth,
+            int blockHeight,
+            int bytesPerPixel,
+            int gobBlocksInY,
+            int gobBlocksInZ,
+            int gobBlocksInTileX,
+            SizeInfo sizeInfo,
+            ReadOnlySpan<byte> data)
+        {
+            Span<byte> output = new byte[sizeInfo.TotalSize];
+
+            int inOffs = 0;
+
+            int wAlignment = gobBlocksInTileX * (GobStride / bytesPerPixel);
+
+            int mipGobBlocksInY = gobBlocksInY;
+            int mipGobBlocksInZ = gobBlocksInZ;
+
+            for (int level = 0; level < levels; level++)
+            {
+                int w = Math.Max(1, width  >> level);
+                int h = Math.Max(1, height >> level);
+                int d = Math.Max(1, depth  >> level);
+
+                w = BitUtils.DivRoundUp(w, blockWidth);
+                h = BitUtils.DivRoundUp(h, blockHeight);
+
+                while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1)
+                {
+                    mipGobBlocksInY >>= 1;
+                }
+
+                while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1)
+                {
+                    mipGobBlocksInZ >>= 1;
+                }
+
+                int stride   = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
+                int wAligned = BitUtils.AlignUp(w, wAlignment);
+
+                BlockLinearLayout layoutConverter = new BlockLinearLayout(
+                    wAligned,
+                    h,
+                    d,
+                    mipGobBlocksInY,
+                    mipGobBlocksInZ,
+                    bytesPerPixel);
+
+                for (int layer = 0; layer < layers; layer++)
+                {
+                    int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level);
+
+                    for (int z = 0; z < d; z++)
+                    for (int y = 0; y < h; y++)
+                    {
+                        for (int x = 0; x < w; x++)
+                        {
+                            int offset = outBaseOffset + layoutConverter.GetOffset(x, y, z);
+
+                            Span<byte> dest = output.Slice(offset, bytesPerPixel);
+
+                            data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest);
+                        }
+
+                        inOffs += stride;
+                    }
+                }
+            }
+
+            return output;
+        }
+
+        public static Span<byte> ConvertLinearToLinearStrided(
+            int width,
+            int height,
+            int blockWidth,
+            int blockHeight,
+            int stride,
+            int bytesPerPixel,
+            ReadOnlySpan<byte> data)
+        {
+            int w = BitUtils.DivRoundUp(width,  blockWidth);
+            int h = BitUtils.DivRoundUp(height, blockHeight);
+
+            int inStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
+
+            Span<byte> output = new byte[h * stride];
+
+            int inOffs = 0;
+
+            for (int y = 0; y < h; y++)
+            {
+                for (int x = 0; x < w; x++)
+                {
+                    int offset = y * stride + x * bytesPerPixel;
+
+                    Span<byte> dest = output.Slice(offset, bytesPerPixel);
+
+                    data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest);
+                }
+
+                inOffs += inStride;
+            }
+
+            return output;
+        }
+
         private static int GetTextureSize(
             int width,
             int height,
@@ -153,7 +266,7 @@ namespace Ryujinx.Graphics.Texture
                 w = BitUtils.DivRoundUp(w, blockWidth);
                 h = BitUtils.DivRoundUp(h, blockHeight);
 
-                int stride = BitUtils.AlignUp(w * bytesPerPixel, AlignmentSize);
+                int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
 
                 layerSize += stride * h * d;
             }