diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs b/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs
index cab1ec807..ba9eb2d1e 100644
--- a/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs
+++ b/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs
@@ -17,6 +17,9 @@ namespace Ryujinx.Graphics.Vic.Image
                 case PixelFormat.A8B8G8R8:
                     WriteA8B8G8R8(rm, input, ref config, ref offsets);
                     break;
+                case PixelFormat.A8R8G8B8:
+                    WriteA8R8G8B8(rm, input, ref config, ref offsets);
+                    break;
                 case PixelFormat.Y8___V8U8_N420:
                     WriteNv12(rm, input, ref config, ref offsets);
                     break;
@@ -116,6 +119,105 @@ namespace Ryujinx.Graphics.Vic.Image
             rm.BufferPool.Return(dstIndex);
         }
 
+        private unsafe static void WriteA8R8G8B8(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
+        {
+            int width = input.Width;
+            int height = input.Height;
+            int stride = GetPitch(width, 4);
+
+            int dstIndex = rm.BufferPool.Rent(height * stride, out Span<byte> dst);
+
+            if (Ssse3.IsSupported)
+            {
+                Vector128<byte> shuffleMask = Vector128.Create(
+                    (byte)2, (byte)1, (byte)0, (byte)3,
+                    (byte)6, (byte)5, (byte)4, (byte)7,
+                    (byte)10, (byte)9, (byte)8, (byte)11,
+                    (byte)14, (byte)13, (byte)12, (byte)15);
+
+                int widthTrunc = width & ~7;
+                int strideGap = stride - width * 4;
+
+                fixed (Pixel* srcPtr = input.Data)
+                {
+                    Pixel* ip = srcPtr;
+
+                    fixed (byte* dstPtr = dst)
+                    {
+                        byte* op = dstPtr;
+
+                        for (int y = 0; y < height; y++, ip += input.Width)
+                        {
+                            int x = 0;
+
+                            for (; x < widthTrunc; x += 8)
+                            {
+                                Vector128<ushort> pixel12 = Sse2.LoadVector128((ushort*)(ip + (uint)x));
+                                Vector128<ushort> pixel34 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 2));
+                                Vector128<ushort> pixel56 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 4));
+                                Vector128<ushort> pixel78 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 6));
+
+                                pixel12 = Sse2.ShiftRightLogical(pixel12, 2);
+                                pixel34 = Sse2.ShiftRightLogical(pixel34, 2);
+                                pixel56 = Sse2.ShiftRightLogical(pixel56, 2);
+                                pixel78 = Sse2.ShiftRightLogical(pixel78, 2);
+
+                                Vector128<byte> pixel1234 = Sse2.PackUnsignedSaturate(pixel12.AsInt16(), pixel34.AsInt16());
+                                Vector128<byte> pixel5678 = Sse2.PackUnsignedSaturate(pixel56.AsInt16(), pixel78.AsInt16());
+
+                                pixel1234 = Ssse3.Shuffle(pixel1234, shuffleMask);
+                                pixel5678 = Ssse3.Shuffle(pixel5678, shuffleMask);
+
+                                Sse2.Store(op + 0x00, pixel1234);
+                                Sse2.Store(op + 0x10, pixel5678);
+
+                                op += 0x20;
+                            }
+
+                            for (; x < width; x++)
+                            {
+                                Pixel* px = ip + (uint)x;
+
+                                *(op + 0) = Downsample(px->B);
+                                *(op + 1) = Downsample(px->G);
+                                *(op + 2) = Downsample(px->R);
+                                *(op + 3) = Downsample(px->A);
+
+                                op += 4;
+                            }
+
+                            op += strideGap;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for (int y = 0; y < height; y++)
+                {
+                    int baseOffs = y * stride;
+
+                    for (int x = 0; x < width; x++)
+                    {
+                        int offs = baseOffs + x * 4;
+
+                        dst[offs + 0] = Downsample(input.GetB(x, y));
+                        dst[offs + 1] = Downsample(input.GetG(x, y));
+                        dst[offs + 2] = Downsample(input.GetR(x, y));
+                        dst[offs + 3] = Downsample(input.GetA(x, y));
+                    }
+                }
+            }
+
+            bool outLinear = config.OutBlkKind == 0;
+
+            int gobBlocksInY = 1 << config.OutBlkHeight;
+
+            WriteBuffer(rm, dst, offsets.LumaOffset, outLinear, width, height, 4, gobBlocksInY);
+
+            rm.BufferPool.Return(dstIndex);
+        }
+
         private unsafe static void WriteNv12(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
         {
             int gobBlocksInY = 1 << config.OutBlkHeight;