From 48f6570557fc76496936514d94e3ccddf55ec633 Mon Sep 17 00:00:00 2001
From: Mary <me@thog.eu>
Date: Fri, 13 Nov 2020 00:15:34 +0100
Subject: [PATCH] Salieri: shader cache (#1701)

Here come Salieri, my implementation of a disk shader cache!

"I'm sure you know why I named it that."
"It doesn't really mean anything."

This implementation collects shaders at runtime and cache them to be later compiled when starting a game.
---
 .../Configuration/ConfigurationFileFormat.cs  |   7 +-
 .../Configuration/ConfigurationState.cs       |  28 +-
 Ryujinx.Common/Hash128.cs                     |  42 ++
 Ryujinx.Common/XXHash128.cs                   | 556 +++++++++++++++
 Ryujinx.Graphics.GAL/IProgram.cs              |   5 +-
 Ryujinx.Graphics.GAL/IRenderer.cs             |   2 +
 Ryujinx.Graphics.Gpu/Engine/Compute.cs        |   2 +-
 Ryujinx.Graphics.Gpu/Engine/Methods.cs        |   4 +-
 Ryujinx.Graphics.Gpu/GpuContext.cs            |  30 +
 Ryujinx.Graphics.Gpu/GraphicsConfig.cs        |  11 +
 .../Image/TextureDescriptor.cs                |  21 +
 .../Ryujinx.Graphics.Gpu.csproj               |   2 +-
 .../Shader/Cache/CacheCollection.cs           | 595 ++++++++++++++++
 .../Shader/Cache/CacheManager.cs              | 168 +++++
 .../Cache/Definition/CacheGraphicsApi.cs      |  38 ++
 .../Shader/Cache/Definition/CacheHashType.cs  |  13 +
 .../Cache/Definition/CacheManifestHeader.cs   |  97 +++
 .../Definition/GuestGpuAccessorHeader.cs      |  62 ++
 .../Cache/Definition/GuestShaderCacheEntry.cs |  88 +++
 .../Definition/GuestShaderCacheEntryHeader.cs |  67 ++
 .../Definition/GuestShaderCacheHeader.cs      |  42 ++
 ...GuestShaderCacheTransformFeedbackHeader.cs |  38 ++
 .../Definition/GuestTextureDescriptor.cs      |  15 +
 .../Cache/Definition/HostShaderCacheEntry.cs  | 210 ++++++
 .../Definition/HostShaderCacheEntryHeader.cs  |  67 ++
 .../Cache/Definition/HostShaderCacheHeader.cs |  42 ++
 .../Shader/CachedGpuAccessor.cs               | 154 +++++
 Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs    | 100 +--
 Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs   |   2 +-
 Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs    | 641 ++++++++++++++++--
 .../Shader/ShaderCodeHolder.cs                |  10 +-
 .../TextureDescriptorCapableGpuAccessor.cs    | 104 +++
 Ryujinx.Graphics.OpenGL/Program.cs            |  37 +
 Ryujinx.Graphics.OpenGL/Renderer.cs           |  14 +
 Ryujinx.Graphics.Shader/BufferDescriptor.cs   |   4 +-
 .../Decoders/IOpCodeTexture.cs                |   2 +-
 .../Decoders/OpCodeImage.cs                   |   5 +-
 .../Decoders/OpCodeTexture.cs                 |   6 +-
 .../Decoders/OpCodeTextureBase.cs             |  14 +
 .../Decoders/OpCodeTextureScalar.cs           |   6 +-
 Ryujinx.Graphics.Shader/InputTopology.cs      |   2 +-
 .../Instructions/InstEmitTexture.cs           |  22 +-
 Ryujinx.Graphics.Shader/ShaderProgram.cs      |   5 +-
 Ryujinx.Graphics.Shader/ShaderProgramInfo.cs  |   2 +-
 Ryujinx.Graphics.Shader/ShaderStage.cs        |   6 +-
 Ryujinx.Graphics.Shader/TextureDescriptor.cs  |  12 +-
 .../Translation/ShaderConfig.cs               |  58 +-
 .../Translation/Translator.cs                 | 162 ++---
 .../Translation/TranslatorContext.cs          | 160 +++++
 Ryujinx.HLE/HOS/ApplicationLoader.cs          |  11 +
 Ryujinx.ShaderTools/Program.cs                |   2 +-
 Ryujinx/Config.json                           |   1 +
 Ryujinx/Ui/GLRenderer.cs                      |  17 +-
 Ryujinx/Ui/GameTableContextMenu.cs            |  88 ++-
 Ryujinx/Ui/MainWindow.cs                      |  57 +-
 Ryujinx/Ui/SettingsWindow.cs                  |   7 +
 Ryujinx/Ui/SettingsWindow.glade               |  22 +-
 57 files changed, 3589 insertions(+), 396 deletions(-)
 create mode 100644 Ryujinx.Common/Hash128.cs
 create mode 100644 Ryujinx.Common/XXHash128.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/CacheCollection.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheGraphicsApi.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheHashType.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheManifestHeader.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestGpuAccessorHeader.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntry.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntryHeader.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheHeader.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheTransformFeedbackHeader.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestTextureDescriptor.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntryHeader.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheHeader.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs
 create mode 100644 Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs
 create mode 100644 Ryujinx.Graphics.Shader/Decoders/OpCodeTextureBase.cs
 create mode 100644 Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs

diff --git a/Ryujinx.Common/Configuration/ConfigurationFileFormat.cs b/Ryujinx.Common/Configuration/ConfigurationFileFormat.cs
index 7ea38bac..bfb1cdc1 100644
--- a/Ryujinx.Common/Configuration/ConfigurationFileFormat.cs
+++ b/Ryujinx.Common/Configuration/ConfigurationFileFormat.cs
@@ -14,7 +14,7 @@ namespace Ryujinx.Configuration
         /// <summary>
         /// The current version of the file format
         /// </summary>
-        public const int CurrentVersion = 15;
+        public const int CurrentVersion = 16;
 
         public int Version { get; set; }
 
@@ -128,6 +128,11 @@ namespace Ryujinx.Configuration
         /// </summary>
         public bool EnableVsync { get; set; }
 
+        /// <summary>
+        /// Enables or disables Shader cache
+        /// </summary>
+        public bool EnableShaderCache { get; set; }
+
         /// <summary>
         /// Enables or disables multi-core scheduling of threads
         /// </summary>
diff --git a/Ryujinx.Common/Configuration/ConfigurationState.cs b/Ryujinx.Common/Configuration/ConfigurationState.cs
index d83d07d3..915cb77e 100644
--- a/Ryujinx.Common/Configuration/ConfigurationState.cs
+++ b/Ryujinx.Common/Configuration/ConfigurationState.cs
@@ -298,13 +298,19 @@ namespace Ryujinx.Configuration
             /// </summary>
             public ReactiveObject<bool> EnableVsync { get; private set; }
 
+            /// <summary>
+            /// Enables or disables Shader cache
+            /// </summary>
+            public ReactiveObject<bool> EnableShaderCache { get; private set; }
+
             public GraphicsSection()
             {
-                ResScale        = new ReactiveObject<int>();
-                ResScaleCustom  = new ReactiveObject<float>();
-                MaxAnisotropy   = new ReactiveObject<float>();
-                ShadersDumpPath = new ReactiveObject<string>();
-                EnableVsync     = new ReactiveObject<bool>();
+                ResScale          = new ReactiveObject<int>();
+                ResScaleCustom    = new ReactiveObject<float>();
+                MaxAnisotropy     = new ReactiveObject<float>();
+                ShadersDumpPath   = new ReactiveObject<string>();
+                EnableVsync       = new ReactiveObject<bool>();
+                EnableShaderCache = new ReactiveObject<bool>();
             }
         }
 
@@ -401,6 +407,7 @@ namespace Ryujinx.Configuration
                 EnableDiscordIntegration  = EnableDiscordIntegration,
                 CheckUpdatesOnStart       = CheckUpdatesOnStart,
                 EnableVsync               = Graphics.EnableVsync,
+                EnableShaderCache         = Graphics.EnableShaderCache,
                 EnableMulticoreScheduling = System.EnableMulticoreScheduling,
                 EnablePtc                 = System.EnablePtc,
                 EnableFsIntegrityChecks   = System.EnableFsIntegrityChecks,
@@ -461,6 +468,7 @@ namespace Ryujinx.Configuration
             EnableDiscordIntegration.Value         = true;
             CheckUpdatesOnStart.Value              = true;
             Graphics.EnableVsync.Value             = true;
+            Graphics.EnableShaderCache.Value       = true;
             System.EnableMulticoreScheduling.Value = true;
             System.EnablePtc.Value                 = false;
             System.EnableFsIntegrityChecks.Value   = true;
@@ -727,6 +735,15 @@ namespace Ryujinx.Configuration
                 configurationFileUpdated = true;
             }
 
+            if (configurationFileFormat.Version < 16)
+            {
+                Common.Logging.Logger.Warning?.Print(LogClass.Application, $"Outdated configuration version {configurationFileFormat.Version}, migrating to version 16.");
+
+                configurationFileFormat.EnableShaderCache = true;
+
+                configurationFileUpdated = true;
+            }
+
             List<InputConfig> inputConfig = new List<InputConfig>();
             inputConfig.AddRange(configurationFileFormat.ControllerConfig);
             inputConfig.AddRange(configurationFileFormat.KeyboardConfig);
@@ -753,6 +770,7 @@ namespace Ryujinx.Configuration
             EnableDiscordIntegration.Value         = configurationFileFormat.EnableDiscordIntegration;
             CheckUpdatesOnStart.Value              = configurationFileFormat.CheckUpdatesOnStart;
             Graphics.EnableVsync.Value             = configurationFileFormat.EnableVsync;
+            Graphics.EnableShaderCache.Value       = configurationFileFormat.EnableShaderCache;
             System.EnableMulticoreScheduling.Value = configurationFileFormat.EnableMulticoreScheduling;
             System.EnablePtc.Value                 = configurationFileFormat.EnablePtc;
             System.EnableFsIntegrityChecks.Value   = configurationFileFormat.EnableFsIntegrityChecks;
diff --git a/Ryujinx.Common/Hash128.cs b/Ryujinx.Common/Hash128.cs
new file mode 100644
index 00000000..99cd015c
--- /dev/null
+++ b/Ryujinx.Common/Hash128.cs
@@ -0,0 +1,42 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Common
+{
+    [StructLayout(LayoutKind.Sequential)]
+    public struct Hash128 : IEquatable<Hash128>
+    {
+        public ulong Low;
+        public ulong High;
+
+        public override string ToString()
+        {
+            return $"{High:x16}{Low:x16}";
+        }
+
+        public static bool operator ==(Hash128 x, Hash128 y)
+        {
+            return x.Equals(y);
+        }
+
+        public static bool operator !=(Hash128 x, Hash128 y)
+        {
+            return !x.Equals(y);
+        }
+
+        public override bool Equals(object obj)
+        {
+            return obj is Hash128 hash128 && Equals(hash128);
+        }
+
+        public bool Equals(Hash128 cmpObj)
+        {
+            return Low == cmpObj.Low && High == cmpObj.High;
+        }
+
+        public override int GetHashCode()
+        {
+            return HashCode.Combine(Low, High);
+        }
+    }
+}
diff --git a/Ryujinx.Common/XXHash128.cs b/Ryujinx.Common/XXHash128.cs
new file mode 100644
index 00000000..827e4cb2
--- /dev/null
+++ b/Ryujinx.Common/XXHash128.cs
@@ -0,0 +1,556 @@
+using System;
+using System.Buffers.Binary;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace Ryujinx.Common
+{
+    public static class XXHash128
+    {
+        private const int StripeLen = 64;
+        private const int AccNb = StripeLen / sizeof(ulong);
+        private const int SecretConsumeRate = 8;
+        private const int SecretLastAccStart = 7;
+        private const int SecretMergeAccsStart = 11;
+        private const int SecretSizeMin = 136;
+        private const int MidSizeStartOffset = 3;
+        private const int MidSizeLastOffset = 17;
+
+        private const uint Prime32_1 = 0x9E3779B1U;
+        private const uint Prime32_2 = 0x85EBCA77U;
+        private const uint Prime32_3 = 0xC2B2AE3DU;
+        private const uint Prime32_4 = 0x27D4EB2FU;
+        private const uint Prime32_5 = 0x165667B1U;
+
+        private const ulong Prime64_1 = 0x9E3779B185EBCA87UL;
+        private const ulong Prime64_2 = 0xC2B2AE3D27D4EB4FUL;
+        private const ulong Prime64_3 = 0x165667B19E3779F9UL;
+        private const ulong Prime64_4 = 0x85EBCA77C2B2AE63UL;
+        private const ulong Prime64_5 = 0x27D4EB2F165667C5UL;
+
+        private static readonly ulong[] Xxh3InitAcc = new ulong[]
+        {
+            Prime32_3,
+            Prime64_1,
+            Prime64_2,
+            Prime64_3,
+            Prime64_4,
+            Prime32_2,
+            Prime64_5,
+            Prime32_1
+        };
+
+        private static readonly byte[] Xxh3KSecret = new byte[]
+        {
+            0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+            0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+            0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+            0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+            0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+            0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+            0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+            0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+            0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+            0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+            0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+            0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e
+        };
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ulong Mult32To64(ulong x, ulong y)
+        {
+            return (ulong)(uint)x * (ulong)(uint)y;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private unsafe static Hash128 Mult64To128(ulong lhs, ulong rhs)
+        {
+            // TODO: Use BigMul once .NET 5 lands.
+            if (Bmi2.X64.IsSupported)
+            {
+                ulong low;
+                ulong high = Bmi2.X64.MultiplyNoFlags(lhs, rhs, &low);
+                return new Hash128
+                {
+                    Low = low,
+                    High = high
+                };
+            }
+
+            ulong loLo = Mult32To64((uint)lhs, (uint)rhs);
+            ulong hiLo = Mult32To64(lhs >> 32, (uint)rhs);
+            ulong loHi = Mult32To64((uint)lhs, rhs >> 32);
+            ulong hiHi = Mult32To64(lhs >> 32, rhs >> 32);
+
+            ulong cross = (loLo >> 32) + (uint)hiLo + loHi;
+            ulong upper = (hiLo >> 32) + (cross >> 32) + hiHi;
+            ulong lower = (cross << 32) | (uint)loLo;
+
+            return new Hash128
+            {
+                Low = lower,
+                High = upper
+            };
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ulong Mul128Fold64(ulong lhs, ulong rhs)
+        {
+            Hash128 product = Mult64To128(lhs, rhs);
+            return product.Low ^ product.High;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ulong XorShift64(ulong v64, int shift)
+        {
+            Debug.Assert(0 <= shift && shift < 64);
+            return v64 ^ (v64 >> shift);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ulong Xxh3Avalanche(ulong h64)
+        {
+            h64 = XorShift64(h64, 37);
+            h64 *= 0x165667919E3779F9UL;
+            h64 = XorShift64(h64, 32);
+            return h64;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ulong Xxh64Avalanche(ulong h64)
+        {
+            h64 ^= h64 >> 33;
+            h64 *= Prime64_2;
+            h64 ^= h64 >> 29;
+            h64 *= Prime64_3;
+            h64 ^= h64 >> 32;
+            return h64;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private unsafe static void Xxh3Accumulate512(Span<ulong> acc, ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret)
+        {
+            if (Avx2.IsSupported)
+            {
+                fixed (ulong* pAcc = acc)
+                {
+                    fixed (byte* pInput = input, pSecret = secret)
+                    {
+                        Vector256<ulong>* xAcc = (Vector256<ulong>*)pAcc;
+                        Vector256<byte>* xInput = (Vector256<byte>*)pInput;
+                        Vector256<byte>* xSecret = (Vector256<byte>*)pSecret;
+
+                        for (ulong i = 0; i < StripeLen / 32; i++)
+                        {
+                            Vector256<byte> dataVec = xInput[i];
+                            Vector256<byte> keyVec = xSecret[i];
+                            Vector256<byte> dataKey = Avx2.Xor(dataVec, keyVec);
+                            Vector256<uint> dataKeyLo = Avx2.Shuffle(dataKey.AsUInt32(), 0b00110001);
+                            Vector256<ulong> product = Avx2.Multiply(dataKey.AsUInt32(), dataKeyLo);
+                            Vector256<uint> dataSwap = Avx2.Shuffle(dataVec.AsUInt32(), 0b01001110);
+                            Vector256<ulong> sum = Avx2.Add(xAcc[i], dataSwap.AsUInt64());
+                            xAcc[i] = Avx2.Add(product, sum);
+                        }
+                    }
+                }
+            }
+            else if (Sse2.IsSupported)
+            {
+                fixed (ulong* pAcc = acc)
+                {
+                    fixed (byte* pInput = input, pSecret = secret)
+                    {
+                        Vector128<ulong>* xAcc = (Vector128<ulong>*)pAcc;
+                        Vector128<byte>* xInput = (Vector128<byte>*)pInput;
+                        Vector128<byte>* xSecret = (Vector128<byte>*)pSecret;
+
+                        for (ulong i = 0; i < StripeLen / 16; i++)
+                        {
+                            Vector128<byte> dataVec = xInput[i];
+                            Vector128<byte> keyVec = xSecret[i];
+                            Vector128<byte> dataKey = Sse2.Xor(dataVec, keyVec);
+                            Vector128<uint> dataKeyLo = Sse2.Shuffle(dataKey.AsUInt32(), 0b00110001);
+                            Vector128<ulong> product = Sse2.Multiply(dataKey.AsUInt32(), dataKeyLo);
+                            Vector128<uint> dataSwap = Sse2.Shuffle(dataVec.AsUInt32(), 0b01001110);
+                            Vector128<ulong> sum = Sse2.Add(xAcc[i], dataSwap.AsUInt64());
+                            xAcc[i] = Sse2.Add(product, sum);
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for (int i = 0; i < AccNb; i++)
+                {
+                    ulong dataVal = BinaryPrimitives.ReadUInt64LittleEndian(input.Slice(i * sizeof(ulong)));
+                    ulong dataKey = dataVal ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(i * sizeof(ulong)));
+                    acc[i ^ 1] += dataVal;
+                    acc[i] += Mult32To64((uint)dataKey, dataKey >> 32);
+                }
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private unsafe static void Xxh3ScrambleAcc(Span<ulong> acc, ReadOnlySpan<byte> secret)
+        {
+            if (Avx2.IsSupported)
+            {
+                fixed (ulong* pAcc = acc)
+                {
+                    fixed (byte* pSecret = secret)
+                    {
+                        Vector256<uint> prime32 = Vector256.Create(Prime32_1);
+                        Vector256<ulong>* xAcc = (Vector256<ulong>*)pAcc;
+                        Vector256<byte>* xSecret = (Vector256<byte>*)pSecret;
+
+                        for (ulong i = 0; i < StripeLen / 32; i++)
+                        {
+                            Vector256<ulong> accVec = xAcc[i];
+                            Vector256<ulong> shifted = Avx2.ShiftRightLogical(accVec, 47);
+                            Vector256<ulong> dataVec = Avx2.Xor(accVec, shifted);
+
+                            Vector256<byte> keyVec = xSecret[i];
+                            Vector256<uint> dataKey = Avx2.Xor(dataVec.AsUInt32(), keyVec.AsUInt32());
+
+                            Vector256<uint> dataKeyHi = Avx2.Shuffle(dataKey.AsUInt32(), 0b00110001);
+                            Vector256<ulong> prodLo = Avx2.Multiply(dataKey, prime32);
+                            Vector256<ulong> prodHi = Avx2.Multiply(dataKeyHi, prime32);
+
+                            xAcc[i] = Avx2.Add(prodLo, Avx2.ShiftLeftLogical(prodHi, 32));
+                        }
+                    }
+                }
+            }
+            else if (Sse2.IsSupported)
+            {
+                fixed (ulong* pAcc = acc)
+                {
+                    fixed (byte* pSecret = secret)
+                    {
+                        Vector128<uint> prime32 = Vector128.Create(Prime32_1);
+                        Vector128<ulong>* xAcc = (Vector128<ulong>*)pAcc;
+                        Vector128<byte>* xSecret = (Vector128<byte>*)pSecret;
+
+                        for (ulong i = 0; i < StripeLen / 16; i++)
+                        {
+                            Vector128<ulong> accVec = xAcc[i];
+                            Vector128<ulong> shifted = Sse2.ShiftRightLogical(accVec, 47);
+                            Vector128<ulong> dataVec = Sse2.Xor(accVec, shifted);
+
+                            Vector128<byte> keyVec = xSecret[i];
+                            Vector128<uint> dataKey = Sse2.Xor(dataVec.AsUInt32(), keyVec.AsUInt32());
+
+                            Vector128<uint> dataKeyHi = Sse2.Shuffle(dataKey.AsUInt32(), 0b00110001);
+                            Vector128<ulong> prodLo = Sse2.Multiply(dataKey, prime32);
+                            Vector128<ulong> prodHi = Sse2.Multiply(dataKeyHi, prime32);
+
+                            xAcc[i] = Sse2.Add(prodLo, Sse2.ShiftLeftLogical(prodHi, 32));
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for (int i = 0; i < AccNb; i++)
+                {
+                    ulong key64 = BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(i * sizeof(ulong)));
+                    ulong acc64 = acc[i];
+                    acc64 = XorShift64(acc64, 47);
+                    acc64 ^= key64;
+                    acc64 *= Prime32_1;
+                    acc[i] = acc64;
+                }
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static void Xxh3Accumulate(Span<ulong> acc, ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, int nbStripes)
+        {
+            for (int n = 0; n < nbStripes; n++)
+            {
+                ReadOnlySpan<byte> inData = input.Slice(n * StripeLen);
+                Xxh3Accumulate512(acc, inData, secret.Slice(n * SecretConsumeRate));
+            }
+        }
+
+        private static void Xxh3HashLongInternalLoop(Span<ulong> acc, ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret)
+        {
+            int nbStripesPerBlock = (secret.Length - StripeLen) / SecretConsumeRate;
+            int blockLen = StripeLen * nbStripesPerBlock;
+            int nbBlocks = (input.Length - 1) / blockLen;
+
+            Debug.Assert(secret.Length >= SecretSizeMin);
+
+            for (int n = 0; n < nbBlocks; n++)
+            {
+                Xxh3Accumulate(acc, input.Slice(n * blockLen), secret, nbStripesPerBlock);
+                Xxh3ScrambleAcc(acc, secret.Slice(secret.Length - StripeLen));
+            }
+
+            Debug.Assert(input.Length > StripeLen);
+
+            int nbStripes = (input.Length - 1 - (blockLen * nbBlocks)) / StripeLen;
+            Debug.Assert(nbStripes <= (secret.Length / SecretConsumeRate));
+            Xxh3Accumulate(acc, input.Slice(nbBlocks * blockLen), secret, nbStripes);
+
+            ReadOnlySpan<byte> p = input.Slice(input.Length - StripeLen);
+            Xxh3Accumulate512(acc, p, secret.Slice(secret.Length - StripeLen - SecretLastAccStart));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ulong Xxh3Mix2Accs(Span<ulong> acc, ReadOnlySpan<byte> secret)
+        {
+            return Mul128Fold64(
+                acc[0] ^ BinaryPrimitives.ReadUInt64LittleEndian(secret),
+                acc[1] ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(8)));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static ulong Xxh3MergeAccs(Span<ulong> acc, ReadOnlySpan<byte> secret, ulong start)
+        {
+            ulong result64 = start;
+
+            for (int i = 0; i < 4; i++)
+            {
+                result64 += Xxh3Mix2Accs(acc.Slice(2 * i), secret.Slice(16 * i));
+            }
+
+            return Xxh3Avalanche(result64);
+        }
+
+        private static Hash128 Xxh3HashLong128bInternal(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret)
+        {
+            Span<ulong> acc = stackalloc ulong[AccNb]; // TODO: Use SkipLocalsInit attribute once .NET 5 lands.
+            Xxh3InitAcc.CopyTo(acc);
+
+            Xxh3HashLongInternalLoop(acc, input, secret);
+
+            Debug.Assert(acc.Length == 8);
+            Debug.Assert(secret.Length >= acc.Length * sizeof(ulong) + SecretMergeAccsStart);
+
+            return new Hash128
+            {
+                Low = Xxh3MergeAccs(acc, secret.Slice(SecretMergeAccsStart), (ulong)input.Length * Prime64_1),
+                High = Xxh3MergeAccs(
+                    acc,
+                    secret.Slice(secret.Length - acc.Length * sizeof(ulong) - SecretMergeAccsStart),
+                    ~((ulong)input.Length * Prime64_2))
+            };
+        }
+
+        private static Hash128 Xxh3Len1To3128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
+        {
+            Debug.Assert(1 <= input.Length && input.Length <= 3);
+
+            byte c1 = input[0];
+            byte c2 = input[input.Length >> 1];
+            byte c3 = input[^1];
+
+            uint combinedL = ((uint)c1 << 16) | ((uint)c2 << 24) | c3 | ((uint)input.Length << 8);
+            uint combinedH = BitOperations.RotateLeft(BinaryPrimitives.ReverseEndianness(combinedL), 13);
+            ulong bitFlipL = (BinaryPrimitives.ReadUInt32LittleEndian(secret) ^ BinaryPrimitives.ReadUInt32LittleEndian(secret.Slice(4))) + seed;
+            ulong bitFlipH = (BinaryPrimitives.ReadUInt32LittleEndian(secret.Slice(8)) ^ BinaryPrimitives.ReadUInt32LittleEndian(secret.Slice(12))) - seed;
+            ulong keyedLo = combinedL ^ bitFlipL;
+            ulong keyedHi = combinedH ^ bitFlipH;
+
+            return new Hash128
+            {
+                Low = Xxh64Avalanche(keyedLo),
+                High = Xxh64Avalanche(keyedHi)
+            };
+        }
+
+        private static Hash128 Xxh3Len4To8128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
+        {
+            Debug.Assert(4 <= input.Length && input.Length <= 8);
+
+            seed ^= BinaryPrimitives.ReverseEndianness((uint)seed) << 32;
+
+            uint inputLo = BinaryPrimitives.ReadUInt32LittleEndian(input);
+            uint inputHi = BinaryPrimitives.ReadUInt32LittleEndian(input.Slice(input.Length - 4));
+            ulong input64 = inputLo + ((ulong)inputHi << 32);
+            ulong bitFlip = (BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(16)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(24))) + seed;
+            ulong keyed = input64 ^ bitFlip;
+
+            Hash128 m128 = Mult64To128(keyed, Prime64_1 + ((ulong)input.Length << 2));
+
+            m128.High += m128.Low << 1;
+            m128.Low ^= m128.High >> 3;
+
+            m128.Low = XorShift64(m128.Low, 35);
+            m128.Low *= 0x9FB21C651E98DF25UL;
+            m128.Low = XorShift64(m128.Low, 28);
+            m128.High = Xxh3Avalanche(m128.High);
+            return m128;
+        }
+
+        private static Hash128 Xxh3Len9To16128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
+        {
+            Debug.Assert(9 <= input.Length && input.Length <= 16);
+
+            ulong bitFlipL = (BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(32)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(40))) - seed;
+            ulong bitFlipH = (BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(48)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(56))) + seed;
+            ulong inputLo = BinaryPrimitives.ReadUInt64LittleEndian(input);
+            ulong inputHi = BinaryPrimitives.ReadUInt64LittleEndian(input.Slice(input.Length - 8));
+
+            Hash128 m128 = Mult64To128(inputLo ^ inputHi ^ bitFlipL, Prime64_1);
+            m128.Low += ((ulong)input.Length - 1) << 54;
+            inputHi ^= bitFlipH;
+            m128.High += inputHi + Mult32To64((uint)inputHi, Prime32_2 - 1);
+            m128.Low ^= BinaryPrimitives.ReverseEndianness(m128.High);
+
+            Hash128 h128 = Mult64To128(m128.Low, Prime64_2);
+            h128.High += m128.High * Prime64_2;
+            h128.Low = Xxh3Avalanche(h128.Low);
+            h128.High = Xxh3Avalanche(h128.High);
+            return h128;
+        }
+
+        private static Hash128 Xxh3Len0To16128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
+        {
+            Debug.Assert(input.Length <= 16);
+
+            if (input.Length > 8)
+            {
+                return Xxh3Len9To16128b(input, secret, seed);
+            }
+            else if (input.Length >= 4)
+            {
+                return Xxh3Len4To8128b(input, secret, seed);
+            }
+            else if (input.Length != 0)
+            {
+                return Xxh3Len1To3128b(input, secret, seed);
+            }
+            else
+            {
+                Hash128 h128 = new Hash128();
+                ulong bitFlipL = BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(64)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(72));
+                ulong bitFlipH = BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(80)) ^ BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(88));
+                h128.Low = Xxh64Avalanche(seed ^ bitFlipL);
+                h128.High = Xxh64Avalanche(seed ^ bitFlipH);
+                return h128;
+            }
+        }
+
+        private static ulong Xxh3Mix16b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
+        {
+            ulong inputLo = BinaryPrimitives.ReadUInt64LittleEndian(input);
+            ulong inputHi = BinaryPrimitives.ReadUInt64LittleEndian(input.Slice(8));
+            return Mul128Fold64(
+                inputLo ^ (BinaryPrimitives.ReadUInt64LittleEndian(secret) + seed),
+                inputHi ^ (BinaryPrimitives.ReadUInt64LittleEndian(secret.Slice(8)) - seed));
+        }
+
+        private static Hash128 Xxh128Mix32b(Hash128 acc, ReadOnlySpan<byte> input, ReadOnlySpan<byte> input2, ReadOnlySpan<byte> secret, ulong seed)
+        {
+            acc.Low += Xxh3Mix16b(input, secret, seed);
+            acc.Low ^= BinaryPrimitives.ReadUInt64LittleEndian(input2) + BinaryPrimitives.ReadUInt64LittleEndian(input2.Slice(8));
+            acc.High += Xxh3Mix16b(input2, secret.Slice(16), seed);
+            acc.High ^= BinaryPrimitives.ReadUInt64LittleEndian(input) + BinaryPrimitives.ReadUInt64LittleEndian(input.Slice(8));
+            return acc;
+        }
+
+        private static Hash128 Xxh3Len17To128128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
+        {
+            Debug.Assert(secret.Length >= SecretSizeMin);
+            Debug.Assert(16 < input.Length && input.Length <= 128);
+
+            Hash128 acc = new Hash128
+            {
+                Low = (ulong)input.Length * Prime64_1,
+                High = 0
+            };
+
+            if (input.Length > 32)
+            {
+                if (input.Length > 64)
+                {
+                    if (input.Length > 96)
+                    {
+                        acc = Xxh128Mix32b(acc, input.Slice(48), input.Slice(input.Length - 64), secret.Slice(96), seed);
+                    }
+                    acc = Xxh128Mix32b(acc, input.Slice(32), input.Slice(input.Length - 48), secret.Slice(64), seed);
+                }
+                acc = Xxh128Mix32b(acc, input.Slice(16), input.Slice(input.Length - 32), secret.Slice(32), seed);
+            }
+            acc = Xxh128Mix32b(acc, input, input.Slice(input.Length - 16), secret, seed);
+
+            Hash128 h128 = new Hash128
+            {
+                Low = acc.Low + acc.High,
+                High = acc.Low * Prime64_1 + acc.High * Prime64_4 + ((ulong)input.Length - seed) * Prime64_2
+            };
+            h128.Low = Xxh3Avalanche(h128.Low);
+            h128.High = 0UL - Xxh3Avalanche(h128.High);
+            return h128;
+        }
+
+        private static Hash128 Xxh3Len129To240128b(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
+        {
+            Debug.Assert(secret.Length >= SecretSizeMin);
+            Debug.Assert(128 < input.Length && input.Length <= 240);
+
+            Hash128 acc = new Hash128();
+
+            int nbRounds = input.Length / 32;
+            acc.Low = (ulong)input.Length * Prime64_1;
+            acc.High = 0;
+
+            for (int i = 0; i < 4; i++)
+            {
+                acc = Xxh128Mix32b(acc, input.Slice(32 * i), input.Slice(32 * i + 16), secret.Slice(32 * i), seed);
+            }
+
+            acc.Low = Xxh3Avalanche(acc.Low);
+            acc.High = Xxh3Avalanche(acc.High);
+            Debug.Assert(nbRounds >= 4);
+
+            for (int i = 4; i < nbRounds; i++)
+            {
+                acc = Xxh128Mix32b(acc, input.Slice(32 * i), input.Slice(32 * i + 16), secret.Slice(MidSizeStartOffset + 32 * (i - 4)), seed);
+            }
+
+            acc = Xxh128Mix32b(acc, input.Slice(input.Length - 16), input.Slice(input.Length - 32), secret.Slice(SecretSizeMin - MidSizeLastOffset - 16), 0UL - seed);
+
+            Hash128 h128 = new Hash128
+            {
+                Low = acc.Low + acc.High,
+                High = acc.Low * Prime64_1 + acc.High * Prime64_4 + ((ulong)input.Length - seed) * Prime64_2
+            };
+            h128.Low = Xxh3Avalanche(h128.Low);
+            h128.High = 0UL - Xxh3Avalanche(h128.High);
+            return h128;
+        }
+
+        private static Hash128 Xxh3128bitsInternal(ReadOnlySpan<byte> input, ReadOnlySpan<byte> secret, ulong seed)
+        {
+            Debug.Assert(secret.Length >= SecretSizeMin);
+
+            if (input.Length <= 16)
+            {
+                return Xxh3Len0To16128b(input, secret, seed);
+            }
+            else if (input.Length <= 128)
+            {
+                return Xxh3Len17To128128b(input, secret, seed);
+            }
+            else if (input.Length <= 240)
+            {
+                return Xxh3Len129To240128b(input, secret, seed);
+            }
+            else
+            {
+                return Xxh3HashLong128bInternal(input, secret);
+            }
+        }
+
+        public static Hash128 ComputeHash(ReadOnlySpan<byte> input)
+        {
+            return Xxh3128bitsInternal(input, Xxh3KSecret, 0UL);
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.GAL/IProgram.cs b/Ryujinx.Graphics.GAL/IProgram.cs
index ef44fc47..5ab8346f 100644
--- a/Ryujinx.Graphics.GAL/IProgram.cs
+++ b/Ryujinx.Graphics.GAL/IProgram.cs
@@ -2,5 +2,8 @@ using System;
 
 namespace Ryujinx.Graphics.GAL
 {
-    public interface IProgram : IDisposable { }
+    public interface IProgram : IDisposable
+    {
+        byte[] GetBinary();
+    }
 }
diff --git a/Ryujinx.Graphics.GAL/IRenderer.cs b/Ryujinx.Graphics.GAL/IRenderer.cs
index 35c2146f..465c8805 100644
--- a/Ryujinx.Graphics.GAL/IRenderer.cs
+++ b/Ryujinx.Graphics.GAL/IRenderer.cs
@@ -27,6 +27,8 @@ namespace Ryujinx.Graphics.GAL
 
         Capabilities GetCapabilities();
 
+        IProgram LoadProgramBinary(byte[] programBinary);
+
         void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data);
 
         void UpdateCounters();
diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
index cd5002ca..fd3114a7 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs
@@ -59,7 +59,7 @@ namespace Ryujinx.Graphics.Gpu.Engine
             TextureManager.SetComputeTexturePool(texturePool.Address.Pack(), texturePool.MaximumId);
             TextureManager.SetComputeTextureBufferIndex(state.Get<int>(MethodOffset.TextureBufferIndex));
 
-            ShaderProgramInfo info = cs.Shaders[0].Program.Info;
+            ShaderProgramInfo info = cs.Shaders[0].Info;
 
             for (int index = 0; index < info.CBuffers.Count; index++)
             {
diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
index cab125b5..f408561a 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs
@@ -1000,14 +1000,14 @@ namespace Ryujinx.Graphics.Gpu.Engine
 
             ShaderBundle gs = ShaderCache.GetGraphicsShader(state, addresses);
 
-            _vsUsesInstanceId = gs.Shaders[0]?.Program.Info.UsesInstanceId ?? false;
+            _vsUsesInstanceId = gs.Shaders[0]?.Info.UsesInstanceId ?? false;
 
             int storageBufferBindingsCount = 0;
             int uniformBufferBindingsCount = 0;
 
             for (int stage = 0; stage < Constants.ShaderStages; stage++)
             {
-                ShaderProgramInfo info = gs.Shaders[stage]?.Program.Info;
+                ShaderProgramInfo info = gs.Shaders[stage]?.Info;
 
                 _currentProgramInfo[stage] = info;
 
diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs
index bdbf77a6..b62b5f3a 100644
--- a/Ryujinx.Graphics.Gpu/GpuContext.cs
+++ b/Ryujinx.Graphics.Gpu/GpuContext.cs
@@ -1,9 +1,11 @@
+using Ryujinx.Common.Configuration;
 using Ryujinx.Graphics.GAL;
 using Ryujinx.Graphics.Gpu.Engine;
 using Ryujinx.Graphics.Gpu.Engine.GPFifo;
 using Ryujinx.Graphics.Gpu.Memory;
 using Ryujinx.Graphics.Gpu.Synchronization;
 using System;
+using System.Threading;
 
 namespace Ryujinx.Graphics.Gpu
 {
@@ -12,6 +14,16 @@ namespace Ryujinx.Graphics.Gpu
     /// </summary>
     public sealed class GpuContext : IDisposable
     {
+        /// <summary>
+        /// Event signaled when the host emulation context is ready to be used by the gpu context.
+        /// </summary>
+        public ManualResetEvent HostInitalized { get; }
+
+        /// <summary>
+        /// Event signaled when the gpu context is ready to be used.
+        /// </summary>
+        public ManualResetEvent ReadyEvent { get; }
+
         /// <summary>
         /// Host renderer.
         /// </summary>
@@ -79,6 +91,22 @@ namespace Ryujinx.Graphics.Gpu
             Window = new Window(this);
 
             _caps = new Lazy<Capabilities>(Renderer.GetCapabilities);
+
+            HostInitalized = new ManualResetEvent(false);
+            ReadyEvent = new ManualResetEvent(false);
+        }
+
+        /// <summary>
+        /// Initialize the GPU emulation context.
+        /// </summary>
+        /// <param name="logLevel">The log level required.</param>
+        public void Initialize(GraphicsDebugLevel logLevel)
+        {
+            HostInitalized.WaitOne();
+
+            Renderer.Initialize(logLevel);
+            Methods.ShaderCache.Initialize();
+            ReadyEvent.Set();
         }
 
         /// <summary>
@@ -113,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu
             Methods.TextureManager.Dispose();
             Renderer.Dispose();
             GPFifo.Dispose();
+            HostInitalized.Dispose();
+            ReadyEvent.Dispose();
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs
index 4d16628f..af980e77 100644
--- a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs
+++ b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs
@@ -32,5 +32,16 @@ namespace Ryujinx.Graphics.Gpu
         /// Enables or disables the Just-in-Time compiler for GPU Macro code.
         /// </summary>
         public static bool EnableMacroJit = true;
+
+        /// <summary>
+        /// Title id of the current running game.
+        /// Used by the shader cache.
+        /// </summary>
+        public static string TitleId;
+
+        /// <summary>
+        /// Enables or disables the shader cache.
+        /// </summary>
+        public static bool EnableShaderCache;
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Image/TextureDescriptor.cs b/Ryujinx.Graphics.Gpu/Image/TextureDescriptor.cs
index 313b8e20..74fb9887 100644
--- a/Ryujinx.Graphics.Gpu/Image/TextureDescriptor.cs
+++ b/Ryujinx.Graphics.Gpu/Image/TextureDescriptor.cs
@@ -1,3 +1,5 @@
+using Ryujinx.Graphics.Gpu.Shader.Cache.Definition;
+
 namespace Ryujinx.Graphics.Gpu.Image
 {
     /// <summary>
@@ -227,5 +229,24 @@ namespace Ryujinx.Graphics.Gpu.Image
         {
             return (TextureMsaaMode)((Word7 >> 8) & 0xf);
         }
+
+        /// <summary>
+        /// Create the equivalent of this TextureDescriptor for the shader cache.
+        /// </summary>
+        /// <returns>The equivalent of this TextureDescriptor for the shader cache.</returns>
+        public GuestTextureDescriptor ToCache()
+        {
+            GuestTextureDescriptor result = new GuestTextureDescriptor
+            {
+                Handle = uint.MaxValue,
+                Descriptor = this
+            };
+
+            // Clear the virtual address
+            result.Descriptor.Word0 = 0;
+            result.Descriptor.Word2 &= 0xFFFF0000;
+
+            return result;
+        }
     }
 }
diff --git a/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj b/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj
index a9e81be3..f40857b4 100644
--- a/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj
+++ b/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
 
   <ItemGroup>
     <ProjectReference Include="..\Ryujinx.Cpu\Ryujinx.Cpu.csproj" />
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheCollection.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheCollection.cs
new file mode 100644
index 00000000..effd893a
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheCollection.cs
@@ -0,0 +1,595 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Gpu.Shader.Cache.Definition;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.IO.Compression;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache
+{
+    /// <summary>
+    /// Represent a cache collection handling one shader cache.
+    /// </summary>
+    class CacheCollection : IDisposable
+    {
+        /// <summary>
+        /// Possible operation to do on the <see cref="_fileWriterWorkerQueue"/>.
+        /// </summary>
+        private enum CacheFileOperation
+        {
+            /// <summary>
+            /// Save a new entry in the temp cache.
+            /// </summary>
+            SaveTempEntry,
+
+            /// <summary>
+            /// Save the hash manifest.
+            /// </summary>
+            SaveManifest,
+
+            /// <summary>
+            /// Flush temporary cache to archive.
+            /// </summary>
+            FlushToArchive,
+
+            /// <summary>
+            /// Signal when hitting this point. This is useful to know if all previous operations were performed.
+            /// </summary>
+            Synchronize
+        }
+
+        /// <summary>
+        /// Represent an operation to perform on the <see cref="_fileWriterWorkerQueue"/>.
+        /// </summary>
+        private class CacheFileOperationTask
+        {
+            /// <summary>
+            /// The type of operation to perform.
+            /// </summary>
+            public CacheFileOperation Type;
+
+            /// <summary>
+            /// The data associated to this operation or null.
+            /// </summary>
+            public object Data;
+        }
+
+        /// <summary>
+        /// Data associated to the <see cref="CacheFileOperation.SaveTempEntry"/> operation.
+        /// </summary>
+        private class CacheFileSaveEntryTaskData
+        {
+            /// <summary>
+            /// The key of the entry to cache.
+            /// </summary>
+            public Hash128 Key;
+
+            /// <summary>
+            /// The value of the entry to cache.
+            /// </summary>
+            public byte[] Value;
+        }
+
+        /// <summary>
+        /// The directory of the shader cache.
+        /// </summary>
+        private readonly string _cacheDirectory;
+
+        /// <summary>
+        /// The version of the cache.
+        /// </summary>
+        private readonly ulong _version;
+
+        /// <summary>
+        /// The hash type of the cache.
+        /// </summary>
+        private readonly CacheHashType _hashType;
+
+        /// <summary>
+        /// The graphics API of the cache.
+        /// </summary>
+        private readonly CacheGraphicsApi _graphicsApi;
+
+        /// <summary>
+        /// The table of all the hash registered in the cache.
+        /// </summary>
+        private HashSet<Hash128> _hashTable;
+
+        /// <summary>
+        /// The queue of operations to be performed by the file writer worker.
+        /// </summary>
+        private AsyncWorkQueue<CacheFileOperationTask> _fileWriterWorkerQueue;
+
+        /// <summary>
+        /// Main storage of the cache collection.
+        /// </summary>
+        private ZipArchive _cacheArchive;
+
+        /// <summary>
+        /// Immutable copy of the hash table.
+        /// </summary>
+        public ReadOnlySpan<Hash128> HashTable => _hashTable.ToArray();
+
+        /// <summary>
+        /// Get the temp path to the cache data directory.
+        /// </summary>
+        /// <returns>The temp path to the cache data directory</returns>
+        private string GetCacheTempDataPath() => Path.Combine(_cacheDirectory, "temp");
+
+        /// <summary>
+        /// The path to the cache archive file.
+        /// </summary>
+        /// <returns>The path to the cache archive file</returns>
+        private string GetArchivePath() => Path.Combine(_cacheDirectory, "cache.zip");
+
+        /// <summary>
+        /// The path to the cache manifest file.
+        /// </summary>
+        /// <returns>The path to the cache manifest file</returns>
+        private string GetManifestPath() => Path.Combine(_cacheDirectory, "cache.info");
+
+        /// <summary>
+        /// Create a new temp path to the given cached file via its hash.
+        /// </summary>
+        /// <param name="key">The hash of the cached data</param>
+        /// <returns>New path to the given cached file</returns>
+        private string GenCacheTempFilePath(Hash128 key) => Path.Combine(GetCacheTempDataPath(), key.ToString());
+
+        /// <summary>
+        /// Create a new cache collection.
+        /// </summary>
+        /// <param name="baseCacheDirectory">The directory of the shader cache</param>
+        /// <param name="hashType">The hash type of the shader cache</param>
+        /// <param name="graphicsApi">The graphics api of the shader cache</param>
+        /// <param name="shaderProvider">The shader provider name of the shader cache</param>
+        /// <param name="cacheName">The name of the cache</param>
+        /// <param name="version">The version of the cache</param>
+        public CacheCollection(string baseCacheDirectory, CacheHashType hashType, CacheGraphicsApi graphicsApi, string shaderProvider, string cacheName, ulong version)
+        {
+            if (hashType != CacheHashType.XxHash128)
+            {
+                throw new NotImplementedException($"{hashType}");
+            }
+
+            _cacheDirectory = GenerateCachePath(baseCacheDirectory, graphicsApi, shaderProvider, cacheName);
+            _graphicsApi = graphicsApi;
+            _hashType = hashType;
+            _version = version;
+            _hashTable = new HashSet<Hash128>();
+
+            Load();
+
+            _fileWriterWorkerQueue = new AsyncWorkQueue<CacheFileOperationTask>(HandleCacheTask, $"CacheCollection.Worker.{cacheName}");
+        }
+
+        /// <summary>
+        /// Load the cache manifest file and recreate it if invalid.
+        /// </summary>
+        private void Load()
+        {
+            bool isInvalid = false;
+
+            if (!Directory.Exists(_cacheDirectory))
+            {
+                isInvalid = true;
+            }
+            else
+            {
+                string manifestPath = GetManifestPath();
+
+                if (File.Exists(manifestPath))
+                {
+                    Memory<byte> rawManifest = File.ReadAllBytes(manifestPath);
+
+                    if (MemoryMarshal.TryRead(rawManifest.Span, out CacheManifestHeader manifestHeader))
+                    {
+                        Memory<byte> hashTableRaw = rawManifest.Slice(Unsafe.SizeOf<CacheManifestHeader>());
+
+                        isInvalid = !manifestHeader.IsValid(_version, _graphicsApi, _hashType, hashTableRaw.Span);
+
+                        if (!isInvalid)
+                        {
+                            ReadOnlySpan<Hash128> hashTable = MemoryMarshal.Cast<byte, Hash128>(hashTableRaw.Span);
+
+                            foreach (Hash128 hash in hashTable)
+                            {
+                                _hashTable.Add(hash);
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    isInvalid = true;
+                }
+            }
+
+            if (isInvalid)
+            {
+                Logger.Warning?.Print(LogClass.Gpu, $"Shader collection \"{_cacheDirectory}\" got invalidated, cache will need to be rebuilt.");
+
+                if (Directory.Exists(_cacheDirectory))
+                {
+                    Directory.Delete(_cacheDirectory, true);
+                }
+
+                Directory.CreateDirectory(_cacheDirectory);
+
+                SaveManifest();
+            }
+
+            FlushToArchive();
+        }
+
+        /// <summary>
+        /// Remove given entries from the manifest.
+        /// </summary>
+        /// <param name="entries">Entries to remove from the manifest</param>
+        public void RemoveManifestEntries(HashSet<Hash128> entries)
+        {
+            lock (_hashTable)
+            {
+                foreach (Hash128 entry in entries)
+                {
+                    _hashTable.Remove(entry);
+                }
+
+                SaveManifest();
+            }
+        }
+
+        /// <summary>
+        /// Queue a task to flush temporary files to the archive on the worker.
+        /// </summary>
+        public void FlushToArchiveAsync()
+        {
+            _fileWriterWorkerQueue.Add(new CacheFileOperationTask
+            {
+                Type = CacheFileOperation.FlushToArchive
+            });
+        }
+
+        /// <summary>
+        /// Wait for all tasks before this given point to be done.
+        /// </summary>
+        public void Synchronize()
+        {
+            using (ManualResetEvent evnt = new ManualResetEvent(false))
+            {
+                _fileWriterWorkerQueue.Add(new CacheFileOperationTask
+                {
+                    Type = CacheFileOperation.Synchronize,
+                    Data = evnt
+                });
+
+                evnt.WaitOne();
+            }
+        }
+
+        /// <summary>
+        /// Flush temporary files to the archive.
+        /// </summary>
+        /// <remarks>This dispose <see cref="_cacheArchive"/> if not null and reinstantiate it.</remarks>
+        private void FlushToArchive()
+        {
+            EnsureArchiveUpToDate();
+
+            // Open the zip in readonly to avoid anyone modifying/corrupting it during normal operations.
+            _cacheArchive = ZipFile.Open(GetArchivePath(), ZipArchiveMode.Read);
+        }
+
+        /// <summary>
+        /// Save temporary files not in archive.
+        /// </summary>
+        /// <remarks>This dispose <see cref="_cacheArchive"/> if not null.</remarks>
+        public void EnsureArchiveUpToDate()
+        {
+            // First close previous opened instance if found.
+            if (_cacheArchive != null)
+            {
+                _cacheArchive.Dispose();
+            }
+
+            string archivePath = GetArchivePath();
+
+            // Open the zip in read/write.
+            _cacheArchive = ZipFile.Open(archivePath, ZipArchiveMode.Update);
+
+            Logger.Info?.Print(LogClass.Gpu, $"Updating cache collection archive {archivePath}...");
+
+            // Update the content of the zip.
+            lock (_hashTable)
+            {
+                foreach (Hash128 hash in _hashTable)
+                {
+                    string cacheTempFilePath = GenCacheTempFilePath(hash);
+
+                    if (File.Exists(cacheTempFilePath))
+                    {
+                        string cacheHash = $"{hash}";
+
+                        ZipArchiveEntry entry = _cacheArchive.GetEntry(cacheHash);
+
+                        entry?.Delete();
+
+                        _cacheArchive.CreateEntryFromFile(cacheTempFilePath, cacheHash);
+                        File.Delete(cacheTempFilePath);
+                    }
+                }
+
+                // Close the instance to force a flush.
+                _cacheArchive.Dispose();
+                _cacheArchive = null;
+
+                string cacheTempDataPath = GetCacheTempDataPath();
+
+                // Create the cache data path if missing.
+                if (!Directory.Exists(cacheTempDataPath))
+                {
+                    Directory.CreateDirectory(cacheTempDataPath);
+                }
+            }
+
+            Logger.Info?.Print(LogClass.Gpu, $"Updated cache collection archive {archivePath}.");
+        }
+
+        /// <summary>
+        /// Save the manifest file.
+        /// </summary>
+        private void SaveManifest()
+        {
+            CacheManifestHeader manifestHeader = new CacheManifestHeader(_version, _graphicsApi, _hashType);
+
+            byte[] data;
+
+            lock (_hashTable)
+            {
+                data = new byte[Unsafe.SizeOf<CacheManifestHeader>() + _hashTable.Count * Unsafe.SizeOf<Hash128>()];
+
+                // CacheManifestHeader has the same size as a Hash128.
+                Span<Hash128> dataSpan = MemoryMarshal.Cast<byte, Hash128>(data.AsSpan()).Slice(1);
+
+                int i = 0;
+
+                foreach (Hash128 hash in _hashTable)
+                {
+                    dataSpan[i++] = hash;
+                }
+            }
+
+            manifestHeader.UpdateChecksum(data.AsSpan().Slice(Unsafe.SizeOf<CacheManifestHeader>()));
+
+            MemoryMarshal.Write(data, ref manifestHeader);
+
+            File.WriteAllBytes(GetManifestPath(), data);
+        }
+
+        /// <summary>
+        /// Generate the path to the cache directory.
+        /// </summary>
+        /// <param name="baseCacheDirectory">The base of the cache directory</param>
+        /// <param name="graphicsApi">The graphics api in use</param>
+        /// <param name="shaderProvider">The name of the shader provider in use</param>
+        /// <param name="cacheName">The name of the cache</param>
+        /// <returns>The path to the cache directory</returns>
+        private static string GenerateCachePath(string baseCacheDirectory, CacheGraphicsApi graphicsApi, string shaderProvider, string cacheName)
+        {
+            string graphicsApiName = graphicsApi switch
+            {
+                CacheGraphicsApi.OpenGL => "opengl",
+                CacheGraphicsApi.OpenGLES => "opengles",
+                CacheGraphicsApi.Vulkan => "vulkan",
+                CacheGraphicsApi.DirectX => "directx",
+                CacheGraphicsApi.Metal => "metal",
+                CacheGraphicsApi.Guest => "guest",
+                _ => throw new NotImplementedException(graphicsApi.ToString()),
+            };
+
+            return Path.Combine(baseCacheDirectory, graphicsApiName, shaderProvider, cacheName);
+        }
+
+        /// <summary>
+        /// Get a cached file with the given hash.
+        /// </summary>
+        /// <param name="keyHash">The given hash</param>
+        /// <returns>The cached file if present or null</returns>
+        public byte[] GetValueRaw(ref Hash128 keyHash)
+        {
+            return GetValueRawFromArchive(ref keyHash) ?? GetValueRawFromFile(ref keyHash);
+        }
+
+        /// <summary>
+        /// Get a cached file with the given hash that is present in the archive.
+        /// </summary>
+        /// <param name="keyHash">The given hash</param>
+        /// <returns>The cached file if present or null</returns>
+        private byte[] GetValueRawFromArchive(ref Hash128 keyHash)
+        {
+            bool found;
+
+            lock (_hashTable)
+            {
+                found = _hashTable.Contains(keyHash);
+            }
+
+            if (found)
+            {
+                ZipArchiveEntry archiveEntry = _cacheArchive.GetEntry($"{keyHash}");
+
+                if (archiveEntry != null)
+                {
+                    try
+                    {
+                        byte[] result = new byte[archiveEntry.Length];
+
+                        using (Stream archiveStream = archiveEntry.Open())
+                        {
+                            archiveStream.Read(result);
+
+                            return result;
+                        }
+                    }
+                    catch (Exception e)
+                    {
+                        Logger.Error?.Print(LogClass.Gpu, $"Cannot load cache file {keyHash} from archive");
+                        Logger.Error?.Print(LogClass.Gpu, e.ToString());
+                    }
+                }
+            }
+
+            return null;
+        }
+
+        /// <summary>
+        /// Get a cached file with the given hash that is not present in the archive.
+        /// </summary>
+        /// <param name="keyHash">The given hash</param>
+        /// <returns>The cached file if present or null</returns>
+        private byte[] GetValueRawFromFile(ref Hash128 keyHash)
+        {
+            bool found;
+
+            lock (_hashTable)
+            {
+                found = _hashTable.Contains(keyHash);
+            }
+
+            if (found)
+            {
+                string cacheTempFilePath = GenCacheTempFilePath(keyHash);
+
+                try
+                {
+                    return File.ReadAllBytes(GenCacheTempFilePath(keyHash));
+                }
+                catch (Exception e)
+                {
+                    Logger.Error?.Print(LogClass.Gpu, $"Cannot load cache file at {cacheTempFilePath}");
+                    Logger.Error?.Print(LogClass.Gpu, e.ToString());
+                }
+            }
+
+            return null;
+        }
+
+        private void HandleCacheTask(CacheFileOperationTask task)
+        {
+            switch (task.Type)
+            {
+                case CacheFileOperation.SaveTempEntry:
+                    SaveTempEntry((CacheFileSaveEntryTaskData)task.Data);
+                    break;
+                case CacheFileOperation.SaveManifest:
+                    SaveManifest();
+                    break;
+                case CacheFileOperation.FlushToArchive:
+                    FlushToArchive();
+                    break;
+                case CacheFileOperation.Synchronize:
+                    ((ManualResetEvent)task.Data).Set();
+                    break;
+                default:
+                    throw new NotImplementedException($"{task.Type}");
+            }
+
+        }
+
+        /// <summary>
+        /// Save a new entry in the temp cache.
+        /// </summary>
+        /// <param name="entry">The entry to save in the temp cache</param>
+        private void SaveTempEntry(CacheFileSaveEntryTaskData entry)
+        {
+            string tempPath = GenCacheTempFilePath(entry.Key);
+
+            File.WriteAllBytes(tempPath, entry.Value);
+        }
+
+        /// <summary>
+        /// Add a new value in the cache with a given hash.
+        /// </summary>
+        /// <param name="keyHash">The hash to use for the value in the cache</param>
+        /// <param name="value">The value to cache</param>
+        public void AddValue(ref Hash128 keyHash, byte[] value)
+        {
+            Debug.Assert(value != null);
+            Debug.Assert(GetValueRaw(ref keyHash) != null);
+
+            bool isAlreadyPresent;
+
+            lock (_hashTable)
+            {
+                isAlreadyPresent = !_hashTable.Add(keyHash);
+            }
+
+            if (isAlreadyPresent)
+            {
+                // NOTE: Used for debug
+                File.WriteAllBytes(GenCacheTempFilePath(new Hash128()), value);
+
+                throw new InvalidOperationException($"Cache collision found on {GenCacheTempFilePath(keyHash)}");
+            }
+
+            // Queue file change operations
+            _fileWriterWorkerQueue.Add(new CacheFileOperationTask
+            {
+                Type = CacheFileOperation.SaveTempEntry,
+                Data = new CacheFileSaveEntryTaskData
+                {
+                    Key = keyHash,
+                    Value = value
+                }
+            });
+
+            // Save the manifest changes
+            _fileWriterWorkerQueue.Add(new CacheFileOperationTask
+            {
+                Type = CacheFileOperation.SaveManifest,
+            });
+        }
+
+        /// <summary>
+        /// Replace a value at the given hash in the cache.
+        /// </summary>
+        /// <param name="keyHash">The hash to use for the value in the cache</param>
+        /// <param name="value">The value to cache</param>
+        public void ReplaceValue(ref Hash128 keyHash, byte[] value)
+        {
+            Debug.Assert(value != null);
+
+            // Only queue file change operations
+            _fileWriterWorkerQueue.Add(new CacheFileOperationTask
+            {
+                Type = CacheFileOperation.SaveTempEntry,
+                Data = new CacheFileSaveEntryTaskData
+                {
+                    Key = keyHash,
+                    Value = value
+                }
+            });
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                // Make sure all operations on _fileWriterWorkerQueue are done.
+                Synchronize();
+
+                _fileWriterWorkerQueue.Dispose();
+                EnsureArchiveUpToDate();
+            }
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs
new file mode 100644
index 00000000..d241eb01
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs
@@ -0,0 +1,168 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Configuration;
+using Ryujinx.Graphics.Gpu.Shader.Cache.Definition;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache
+{
+    /// <summary>
+    /// Global Manager of the shader cache.
+    /// </summary>
+    class CacheManager : IDisposable
+    {
+        private CacheGraphicsApi _graphicsApi;
+        private CacheHashType _hashType;
+        private string _shaderProvider;
+
+        /// <summary>
+        /// Cache storing raw Maxwell shaders as programs.
+        /// </summary>
+        private CacheCollection _guestProgramCache;
+
+        /// <summary>
+        /// Cache storing raw host programs.
+        /// </summary>
+        private CacheCollection _hostProgramCache;
+
+        /// <summary>
+        /// Version of the guest cache shader (to increment when guest cache structure change).
+        /// </summary>
+        private const ulong GuestCacheVersion = 1;
+
+        /// <summary>
+        /// Create a new cache manager instance
+        /// </summary>
+        /// <param name="graphicsApi">The graphics api in use</param>
+        /// <param name="hashType">The hash type in use for the cache</param>
+        /// <param name="shaderProvider">The name of the codegen provider</param>
+        /// <param name="titleId">The guest application title ID</param>
+        /// <param name="shaderCodeGenVersion">Version of the codegen</param>
+        public CacheManager(CacheGraphicsApi graphicsApi, CacheHashType hashType, string shaderProvider, string titleId, ulong shaderCodeGenVersion)
+        {
+            _graphicsApi = graphicsApi;
+            _hashType = hashType;
+            _shaderProvider = shaderProvider;
+
+            string baseCacheDirectory = Path.Combine(AppDataManager.GamesDirPath, titleId, "cache", "shader");
+
+            _guestProgramCache = new CacheCollection(baseCacheDirectory, _hashType, CacheGraphicsApi.Guest, "", "program", GuestCacheVersion);
+            _hostProgramCache = new CacheCollection(baseCacheDirectory, _hashType, _graphicsApi, _shaderProvider, "host", shaderCodeGenVersion);
+        }
+
+
+        /// <summary>
+        /// Entries to remove from the manifest.
+        /// </summary>
+        /// <param name="entries">Entries to remove from the manifest of all caches</param>
+        public void RemoveManifestEntries(HashSet<Hash128> entries)
+        {
+            _guestProgramCache.RemoveManifestEntries(entries);
+            _hostProgramCache.RemoveManifestEntries(entries);
+        }
+
+        /// <summary>
+        /// Queue a task to flush temporary files to the archives.
+        /// </summary>
+        public void FlushToArchive()
+        {
+            _guestProgramCache.FlushToArchiveAsync();
+            _hostProgramCache.FlushToArchiveAsync();
+        }
+
+        /// <summary>
+        /// Wait for all tasks before this given point to be done.
+        /// </summary>
+        public void Synchronize()
+        {
+            _guestProgramCache.Synchronize();
+            _hostProgramCache.Synchronize();
+        }
+
+        /// <summary>
+        /// Computes the hash of some data using the current cache hashing algorithm.
+        /// </summary>
+        /// <param name="data">Some data to generate a hash for.</param>
+        /// <returns>The hash of some data using the current hashing algorithm of the cache</returns>
+        public Hash128 ComputeHash(ReadOnlySpan<byte> data)
+        {
+            return XXHash128.ComputeHash(data);
+        }
+
+        /// <summary>
+        /// Save a shader program not present in the program cache.
+        /// </summary>
+        /// <param name="programCodeHash">Target program code hash</param>
+        /// <param name="guestProgram">Guest program raw data</param>
+        /// <param name="hostProgram">Host program raw data</param>
+        public void SaveProgram(ref Hash128 programCodeHash, byte[] guestProgram, byte[] hostProgram)
+        {
+            _guestProgramCache.AddValue(ref programCodeHash, guestProgram);
+            _hostProgramCache.AddValue(ref programCodeHash, hostProgram);
+        }
+
+        /// <summary>
+        /// Add a host shader program not present in the program cache.
+        /// </summary>
+        /// <param name="programCodeHash">Target program code hash</param>
+        /// <param name="data">Host program raw data</param>
+        public void AddHostProgram(ref Hash128 programCodeHash, byte[] data)
+        {
+            _hostProgramCache.AddValue(ref programCodeHash, data);
+        }
+
+        /// <summary>
+        /// Replace a host shader program present in the program cache.
+        /// </summary>
+        /// <param name="programCodeHash">Target program code hash</param>
+        /// <param name="data">Host program raw data</param>
+        public void ReplaceHostProgram(ref Hash128 programCodeHash, byte[] data)
+        {
+            _hostProgramCache.ReplaceValue(ref programCodeHash, data);
+        }
+
+        /// <summary>
+        /// Get all guest program hashes.
+        /// </summary>
+        /// <returns>All guest program hashes</returns>
+        public ReadOnlySpan<Hash128> GetGuestProgramList()
+        {
+            return _guestProgramCache.HashTable;
+        }
+
+        /// <summary>
+        /// Get a host program by hash.
+        /// </summary>
+        /// <param name="hash">The given hash</param>
+        /// <returns>The host program if present or null</returns>
+        public byte[] GetHostProgramByHash(ref Hash128 hash)
+        {
+            return _hostProgramCache.GetValueRaw(ref hash);
+        }
+
+        /// <summary>
+        /// Get a guest program by hash.
+        /// </summary>
+        /// <param name="hash">The given hash</param>
+        /// <returns>The guest program if present or null</returns>
+        public byte[] GetGuestProgramByHash(ref Hash128 hash)
+        {
+            return _guestProgramCache.GetValueRaw(ref hash);
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                _guestProgramCache.Dispose();
+                _hostProgramCache.Dispose();
+            }
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheGraphicsApi.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheGraphicsApi.cs
new file mode 100644
index 00000000..9f8b5c39
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheGraphicsApi.cs
@@ -0,0 +1,38 @@
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// Graphics API type accepted by the shader cache.
+    /// </summary>
+    enum CacheGraphicsApi : byte
+    {
+        /// <summary>
+        /// OpenGL Core
+        /// </summary>
+        OpenGL,
+
+        /// <summary>
+        /// OpenGL ES
+        /// </summary>
+        OpenGLES,
+
+        /// <summary>
+        /// Vulkan
+        /// </summary>
+        Vulkan,
+
+        /// <summary>
+        /// DirectX
+        /// </summary>
+        DirectX,
+
+        /// <summary>
+        /// Metal
+        /// </summary>
+        Metal,
+
+        /// <summary>
+        /// Guest, used to cache games raw shader programs.
+        /// </summary>
+        Guest
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheHashType.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheHashType.cs
new file mode 100644
index 00000000..e4ebe416
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheHashType.cs
@@ -0,0 +1,13 @@
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// Hash algorithm accepted by the shader cache.
+    /// </summary>
+    enum CacheHashType : byte
+    {
+        /// <summary>
+        /// xxHash128
+        /// </summary>
+        XxHash128
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheManifestHeader.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheManifestHeader.cs
new file mode 100644
index 00000000..3f198dca
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/CacheManifestHeader.cs
@@ -0,0 +1,97 @@
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// Header of the shader cache manifest.
+    /// </summary>
+    [StructLayout(LayoutKind.Sequential, Pack = 1, Size = 0x10)]
+    struct CacheManifestHeader
+    {
+        /// <summary>
+        /// The version of the cache.
+        /// </summary>
+        public ulong Version;
+
+        /// <summary>
+        /// The graphics api used for this cache.
+        /// </summary>
+        public CacheGraphicsApi GraphicsApi;
+
+        /// <summary>
+        /// The hash type used for this cache.
+        /// </summary>
+        public CacheHashType HashType;
+
+        /// <summary>
+        /// CRC-16 checksum over the data in the file.
+        /// </summary>
+        public ushort TableChecksum;
+
+        /// <summary>
+        /// Construct a new cache manifest header.
+        /// </summary>
+        /// <param name="version">The version of the cache</param>
+        /// <param name="graphicsApi">The graphics api used for this cache</param>
+        /// <param name="hashType">The hash type used for this cache</param>
+        public CacheManifestHeader(ulong version, CacheGraphicsApi graphicsApi, CacheHashType hashType)
+        {
+            Version = version;
+            GraphicsApi = graphicsApi;
+            HashType = hashType;
+            TableChecksum = 0;
+        }
+
+        /// <summary>
+        /// Update the checksum in the header.
+        /// </summary>
+        /// <param name="data">The data to perform the checksum on</param>
+        public void UpdateChecksum(ReadOnlySpan<byte> data)
+        {
+            TableChecksum = CalculateCrc16(data);
+        }
+
+        /// <summary>
+        /// Calculate a CRC-16 over data.
+        /// </summary>
+        /// <param name="data">The data to perform the CRC-16 on</param>
+        /// <returns>A CRC-16 over data</returns>
+        private static ushort CalculateCrc16(ReadOnlySpan<byte> data)
+        {
+            int crc = 0;
+
+            const ushort poly = 0x1021;
+
+            for (int i = 0; i < data.Length; i++)
+            {
+                crc ^= data[i] << 8;
+
+                for (int j = 0; j < 8; j++)
+                {
+                    crc <<= 1;
+
+                    if ((crc & 0x10000) != 0)
+                    {
+                        crc = (crc ^ poly) & 0xFFFF;
+                    }
+                }
+            }
+
+            return (ushort)crc;
+        }
+
+        /// <summary>
+        /// Check the validity of the header.
+        /// </summary>
+        /// <param name="version">The target version in use</param>
+        /// <param name="graphicsApi">The target graphics api in use</param>
+        /// <param name="hashType">The target hash type in use</param>
+        /// <param name="data">The data after this header</param>
+        /// <returns>True if the header is valid</returns>
+        public bool IsValid(ulong version, CacheGraphicsApi graphicsApi, CacheHashType hashType, ReadOnlySpan<byte> data)
+        {
+            return Version == version && GraphicsApi == graphicsApi && HashType == hashType && TableChecksum == CalculateCrc16(data);
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestGpuAccessorHeader.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestGpuAccessorHeader.cs
new file mode 100644
index 00000000..396b0443
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestGpuAccessorHeader.cs
@@ -0,0 +1,62 @@
+using Ryujinx.Graphics.Shader;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// Header of a cached guest gpu accessor.
+    /// </summary>
+    [StructLayout(LayoutKind.Sequential, Size = 0x20, Pack = 1)]
+    struct GuestGpuAccessorHeader
+    {
+        /// <summary>
+        /// The count of texture descriptors.
+        /// </summary>
+        public int TextureDescriptorCount;
+
+        /// <summary>
+        /// Local Size X for compute shaders.
+        /// </summary>
+        public int ComputeLocalSizeX;
+
+        /// <summary>
+        /// Local Size Y for compute shaders.
+        /// </summary>
+        public int ComputeLocalSizeY;
+
+        /// <summary>
+        /// Local Size Z for compute shaders.
+        /// </summary>
+        public int ComputeLocalSizeZ;
+
+        /// <summary>
+        /// Local Memory size in bytes for compute shaders.
+        /// </summary>
+        public int ComputeLocalMemorySize;
+
+        /// <summary>
+        /// Shared Memory size in bytes for compute shaders.
+        /// </summary>
+        public int ComputeSharedMemorySize;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public int Reserved1;
+
+        /// <summary>
+        /// Current primitive topology for geometry shaders.
+        /// </summary>
+        public InputTopology PrimitiveTopology;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public ushort Reserved2;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public byte Reserved3;
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntry.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntry.cs
new file mode 100644
index 00000000..45a442e2
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntry.cs
@@ -0,0 +1,88 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// Represent a cached shader entry in a guest shader program.
+    /// </summary>
+    class GuestShaderCacheEntry
+    {
+        /// <summary>
+        /// The header of the cached shader entry.
+        /// </summary>
+        public GuestShaderCacheEntryHeader Header { get; }
+
+        /// <summary>
+        /// The code of this shader.
+        /// </summary>
+        /// <remarks>If a Vertex A is present, this also contains the code 2 section.</remarks>
+        public byte[] Code { get; }
+
+        /// <summary>
+        /// The textures descriptors used for this shader.
+        /// </summary>
+        public Dictionary<int, GuestTextureDescriptor> TextureDescriptors { get; }
+
+        /// <summary>
+        /// Create a new instance of <see cref="GuestShaderCacheEntry"/>.
+        /// </summary>
+        /// <param name="header">The header of the cached shader entry</param>
+        /// <param name="code">The code of this shader</param>
+        private GuestShaderCacheEntry(GuestShaderCacheEntryHeader header, byte[] code)
+        {
+            Header = header;
+            Code = code;
+            TextureDescriptors = new Dictionary<int, GuestTextureDescriptor>();
+        }
+
+        /// <summary>
+        /// Parse a raw cached user shader program into an array of shader cache entry.
+        /// </summary>
+        /// <param name="data">The raw cached user shader program</param>
+        /// <param name="fileHeader">The user shader program header</param>
+        /// <returns>An array of shader cache entry</returns>
+        public static GuestShaderCacheEntry[] Parse(ref ReadOnlySpan<byte> data, out GuestShaderCacheHeader fileHeader)
+        {
+            fileHeader = MemoryMarshal.Read<GuestShaderCacheHeader>(data);
+
+            data = data.Slice(Unsafe.SizeOf<GuestShaderCacheHeader>());
+
+            ReadOnlySpan<GuestShaderCacheEntryHeader> entryHeaders = MemoryMarshal.Cast<byte, GuestShaderCacheEntryHeader>(data.Slice(0, fileHeader.Count * Unsafe.SizeOf<GuestShaderCacheEntryHeader>()));
+
+            data = data.Slice(fileHeader.Count * Unsafe.SizeOf<GuestShaderCacheEntryHeader>());
+
+            GuestShaderCacheEntry[] result = new GuestShaderCacheEntry[fileHeader.Count];
+
+            for (int i = 0; i < result.Length; i++)
+            {
+                GuestShaderCacheEntryHeader header = entryHeaders[i];
+
+                // Ignore empty entries
+                if (header.Size == 0 && header.SizeA == 0)
+                {
+                    continue;
+                }
+
+                byte[] code = data.Slice(0, header.Size + header.SizeA).ToArray();
+
+                data = data.Slice(header.Size + header.SizeA);
+
+                result[i] = new GuestShaderCacheEntry(header, code);
+
+                ReadOnlySpan<GuestTextureDescriptor> textureDescriptors = MemoryMarshal.Cast<byte, GuestTextureDescriptor>(data.Slice(0, header.GpuAccessorHeader.TextureDescriptorCount * Unsafe.SizeOf<GuestTextureDescriptor>()));
+
+                foreach (GuestTextureDescriptor textureDescriptor in textureDescriptors)
+                {
+                    result[i].TextureDescriptors.Add((int)textureDescriptor.Handle, textureDescriptor);
+                }
+
+                data = data.Slice(header.GpuAccessorHeader.TextureDescriptorCount * Unsafe.SizeOf<GuestTextureDescriptor>());
+            }
+
+            return result;
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntryHeader.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntryHeader.cs
new file mode 100644
index 00000000..6d5bb28d
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntryHeader.cs
@@ -0,0 +1,67 @@
+using Ryujinx.Graphics.Shader;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// The header of a guest shader entry in a guest shader program.
+    /// </summary>
+    [StructLayout(LayoutKind.Sequential, Pack = 0x1, Size = 0x30)]
+    struct GuestShaderCacheEntryHeader
+    {
+        /// <summary>
+        /// The stage of this shader.
+        /// </summary>
+        public ShaderStage Stage;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public byte Reserved1;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public byte Reserved2;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public byte Reserved3;
+
+        /// <summary>
+        /// The size of the code section.
+        /// </summary>
+        public int Size;
+
+        /// <summary>
+        /// The size of the code2 section if present. (Vertex A)
+        /// </summary>
+        public int SizeA;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public int Reserved4;
+
+        /// <summary>
+        /// The header of the cached gpu accessor.
+        /// </summary>
+        public GuestGpuAccessorHeader GpuAccessorHeader;
+
+        /// <summary>
+        /// Create a new guest shader entry header.
+        /// </summary>
+        /// <param name="stage">The stage of this shader</param>
+        /// <param name="size">The size of the code section</param>
+        /// <param name="sizeA">The size of the code2 section if present (Vertex A)</param>
+        /// <param name="gpuAccessorHeader">The header of the cached gpu accessor</param>
+        public GuestShaderCacheEntryHeader(ShaderStage stage, int size, int sizeA, GuestGpuAccessorHeader gpuAccessorHeader) : this()
+        {
+            Stage = stage;
+            Size  = size;
+            SizeA = sizeA;
+            GpuAccessorHeader = gpuAccessorHeader;
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheHeader.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheHeader.cs
new file mode 100644
index 00000000..700be47d
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheHeader.cs
@@ -0,0 +1,42 @@
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// The header of a shader program in the guest cache.
+    /// </summary>
+    [StructLayout(LayoutKind.Sequential, Pack = 0x1, Size = 0x10)]
+    struct GuestShaderCacheHeader
+    {
+        /// <summary>
+        /// The count of shaders defining this program.
+        /// </summary>
+        public byte Count;
+
+        /// <summary>
+        /// The count of transform feedback data used in this program.
+        /// </summary>
+        public byte TransformFeedbackCount;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public ushort Reserved1;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public ulong Reserved2;
+
+        /// <summary>
+        /// Create a new guest shader cache header.
+        /// </summary>
+        /// <param name="count">The count of shaders defining this program</param>
+        /// <param name="transformFeedbackCount">The count of transform feedback data used in this program</param>
+        public GuestShaderCacheHeader(byte count, byte transformFeedbackCount) : this()
+        {
+            Count = count;
+            TransformFeedbackCount = transformFeedbackCount;
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheTransformFeedbackHeader.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheTransformFeedbackHeader.cs
new file mode 100644
index 00000000..18cfdf55
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheTransformFeedbackHeader.cs
@@ -0,0 +1,38 @@
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// Header for transform feedback.
+    /// </summary>
+    [StructLayout(LayoutKind.Sequential, Pack = 1, Size = 0x10)]
+    struct GuestShaderCacheTransformFeedbackHeader
+    {
+        /// <summary>
+        /// The buffer index of the transform feedback.
+        /// </summary>
+        public int BufferIndex;
+
+        /// <summary>
+        /// The stride of the transform feedback.
+        /// </summary>
+        public int Stride;
+
+        /// <summary>
+        /// The length of the varying location buffer of the transform feedback.
+        /// </summary>
+        public int VaryingLocationsLength;
+
+        /// <summary>
+        /// Reserved/unused.
+        /// </summary>
+        public int Reserved1;
+
+        public GuestShaderCacheTransformFeedbackHeader(int bufferIndex, int stride, int varyingLocationsLength) : this()
+        {
+            BufferIndex = bufferIndex;
+            Stride = stride;
+            VaryingLocationsLength = varyingLocationsLength;
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestTextureDescriptor.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestTextureDescriptor.cs
new file mode 100644
index 00000000..7c73ef7b
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestTextureDescriptor.cs
@@ -0,0 +1,15 @@
+using Ryujinx.Graphics.Gpu.Image;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// Mostly identical to TextureDescriptor from <see cref="Image"/> but we don't store the address of the texture and store its handle instead.
+    /// </summary>
+    [StructLayout(LayoutKind.Sequential, Size = 0x20, Pack = 1)]
+    struct GuestTextureDescriptor
+    {
+        public uint Handle;
+        internal TextureDescriptor Descriptor;
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs
new file mode 100644
index 00000000..f592919f
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntry.cs
@@ -0,0 +1,210 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Shader;
+using System;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// Host shader entry used for binding information.
+    /// </summary>
+    class HostShaderCacheEntry
+    {
+        /// <summary>
+        /// The header of the cached shader entry.
+        /// </summary>
+        public HostShaderCacheEntryHeader Header { get; }
+
+        /// <summary>
+        /// Cached constant buffers.
+        /// </summary>
+        public BufferDescriptor[] CBuffers { get; }
+
+        /// <summary>
+        /// Cached storage buffers.
+        /// </summary>
+        public BufferDescriptor[] SBuffers { get; }
+
+        /// <summary>
+        /// Cached texture descriptors.
+        /// </summary>
+        public TextureDescriptor[] Textures { get; }
+
+        /// <summary>
+        /// Cached image descriptors.
+        /// </summary>
+        public TextureDescriptor[] Images { get; }
+
+        /// <summary>
+        /// Create a new instance of <see cref="HostShaderCacheEntry"/>.
+        /// </summary>
+        /// <param name="header">The header of the cached shader entry</param>
+        /// <param name="cBuffers">Cached constant buffers</param>
+        /// <param name="sBuffers">Cached storage buffers</param>
+        /// <param name="textures">Cached texture descriptors</param>
+        /// <param name="images">Cached image descriptors</param>
+        private HostShaderCacheEntry(
+            HostShaderCacheEntryHeader header,
+            BufferDescriptor[] cBuffers,
+            BufferDescriptor[] sBuffers,
+            TextureDescriptor[] textures,
+            TextureDescriptor[] images)
+        {
+            Header = header;
+            CBuffers = cBuffers;
+            SBuffers = sBuffers;
+            Textures = textures;
+            Images = images;
+        }
+
+        private HostShaderCacheEntry()
+        {
+            Header = new HostShaderCacheEntryHeader();
+            CBuffers = new BufferDescriptor[0];
+            SBuffers = new BufferDescriptor[0];
+            Textures = new TextureDescriptor[0];
+            Images = new TextureDescriptor[0];
+        }
+
+        private HostShaderCacheEntry(ShaderProgramInfo programInfo)
+        {
+            Header = new HostShaderCacheEntryHeader(programInfo.CBuffers.Count,
+                                                    programInfo.SBuffers.Count,
+                                                    programInfo.Textures.Count,
+                                                    programInfo.Images.Count,
+                                                    programInfo.UsesInstanceId);
+            CBuffers = programInfo.CBuffers.ToArray();
+            SBuffers = programInfo.SBuffers.ToArray();
+            Textures = programInfo.Textures.ToArray();
+            Images = programInfo.Images.ToArray();
+        }
+
+        /// <summary>
+        /// Convert the host shader entry to a <see cref="ShaderProgramInfo"/>.
+        /// </summary>
+        /// <returns>A new <see cref="ShaderProgramInfo"/> from this instance</returns>
+        internal ShaderProgramInfo ToShaderProgramInfo()
+        {
+            return new ShaderProgramInfo(CBuffers, SBuffers, Textures, Images, Header.UsesInstanceId);
+        }
+
+        /// <summary>
+        /// Parse a raw cached user shader program into an array of shader cache entry.
+        /// </summary>
+        /// <param name="data">The raw cached host shader</param>
+        /// <param name="programCode">The host shader program</param>
+        /// <returns>An array of shader cache entry</returns>
+        internal static HostShaderCacheEntry[] Parse(ReadOnlySpan<byte> data, out ReadOnlySpan<byte> programCode)
+        {
+            HostShaderCacheHeader fileHeader = MemoryMarshal.Read<HostShaderCacheHeader>(data);
+
+            data = data.Slice(Unsafe.SizeOf<HostShaderCacheHeader>());
+
+            ReadOnlySpan<HostShaderCacheEntryHeader> entryHeaders = MemoryMarshal.Cast<byte, HostShaderCacheEntryHeader>(data.Slice(0, fileHeader.Count * Unsafe.SizeOf<HostShaderCacheEntryHeader>()));
+
+            data = data.Slice(fileHeader.Count * Unsafe.SizeOf<HostShaderCacheEntryHeader>());
+
+            HostShaderCacheEntry[] result = new HostShaderCacheEntry[fileHeader.Count];
+
+            for (int i = 0; i < result.Length; i++)
+            {
+                HostShaderCacheEntryHeader header = entryHeaders[i];
+
+                if (!header.InUse)
+                {
+                    continue;
+                }
+
+                int cBufferDescriptorsSize = header.CBuffersCount * Unsafe.SizeOf<BufferDescriptor>();
+                int sBufferDescriptorsSize = header.SBuffersCount * Unsafe.SizeOf<BufferDescriptor>();
+                int textureDescriptorsSize = header.TexturesCount * Unsafe.SizeOf<TextureDescriptor>();
+                int imageDescriptorsSize   = header.ImagesCount * Unsafe.SizeOf<TextureDescriptor>();
+
+                ReadOnlySpan<BufferDescriptor> cBuffers = MemoryMarshal.Cast<byte, BufferDescriptor>(data.Slice(0, cBufferDescriptorsSize));
+                data = data.Slice(cBufferDescriptorsSize);
+
+                ReadOnlySpan<BufferDescriptor> sBuffers = MemoryMarshal.Cast<byte, BufferDescriptor>(data.Slice(0, sBufferDescriptorsSize));
+                data = data.Slice(sBufferDescriptorsSize);
+
+                ReadOnlySpan<TextureDescriptor> textureDescriptors = MemoryMarshal.Cast<byte, TextureDescriptor>(data.Slice(0, textureDescriptorsSize));
+                data = data.Slice(textureDescriptorsSize);
+
+                ReadOnlySpan<TextureDescriptor> imageDescriptors = MemoryMarshal.Cast<byte, TextureDescriptor>(data.Slice(0, imageDescriptorsSize));
+                data = data.Slice(imageDescriptorsSize);
+
+                result[i] = new HostShaderCacheEntry(header, cBuffers.ToArray(), sBuffers.ToArray(), textureDescriptors.ToArray(), imageDescriptors.ToArray());
+            }
+
+            programCode = data.Slice(0, fileHeader.CodeSize);
+
+            return result;
+        }
+
+        /// <summary>
+        /// Create a new host shader cache file.
+        /// </summary>
+        /// <param name="programCode">The host shader program</param>
+        /// <param name="codeHolders">The shaders code holder</param>
+        /// <returns>Raw data of a new host shader cache file</returns>
+        internal static byte[] Create(ReadOnlySpan<byte> programCode, ShaderCodeHolder[] codeHolders)
+        {
+            HostShaderCacheHeader header = new HostShaderCacheHeader((byte)codeHolders.Length, programCode.Length);
+
+            HostShaderCacheEntry[] entries = new HostShaderCacheEntry[codeHolders.Length];
+
+            for (int i = 0; i < codeHolders.Length; i++)
+            {
+                if (codeHolders[i] == null)
+                {
+                    entries[i] = new HostShaderCacheEntry();
+                }
+                else
+                {
+                    entries[i] = new HostShaderCacheEntry(codeHolders[i].Info);
+                }
+            }
+
+            using (MemoryStream stream = new MemoryStream())
+            {
+                BinaryWriter writer = new BinaryWriter(stream);
+
+                writer.WriteStruct(header);
+
+                foreach (HostShaderCacheEntry entry in entries)
+                {
+                    writer.WriteStruct(entry.Header);
+                }
+
+                foreach (HostShaderCacheEntry entry in entries)
+                {
+                    foreach (BufferDescriptor cBuffer in entry.CBuffers)
+                    {
+                        writer.WriteStruct(cBuffer);
+                    }
+
+                    foreach (BufferDescriptor sBuffer in entry.SBuffers)
+                    {
+                        writer.WriteStruct(sBuffer);
+                    }
+
+                    foreach (TextureDescriptor texture in entry.Textures)
+                    {
+                        writer.WriteStruct(texture);
+                    }
+
+                    foreach (TextureDescriptor image in entry.Images)
+                    {
+                        writer.WriteStruct(image);
+                    }
+                }
+
+                writer.Write(programCode);
+
+                return stream.ToArray();
+            }
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntryHeader.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntryHeader.cs
new file mode 100644
index 00000000..9b1af8fb
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheEntryHeader.cs
@@ -0,0 +1,67 @@
+using System.Runtime.InteropServices;
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// Host shader entry header used for binding information.
+    /// </summary>
+    [StructLayout(LayoutKind.Sequential, Pack = 1, Size = 0x14)]
+    struct HostShaderCacheEntryHeader
+    {
+        /// <summary>
+        /// Count of constant buffer descriptors.
+        /// </summary>
+        public int CBuffersCount;
+
+        /// <summary>
+        /// Count of storage buffer descriptors.
+        /// </summary>
+        public int SBuffersCount;
+
+        /// <summary>
+        /// Count of texture descriptors.
+        /// </summary>
+        public int TexturesCount;
+
+        /// <summary>
+        /// Count of image descriptors.
+        /// </summary>
+        public int ImagesCount;
+
+        /// <summary>
+        /// Set to true if the shader uses instance id.
+        /// </summary>
+        [MarshalAs(UnmanagedType.I1)]
+        public bool UsesInstanceId;
+
+        /// <summary>
+        /// Set to true if this entry is in use.
+        /// </summary>
+        [MarshalAs(UnmanagedType.I1)]
+        public bool InUse;
+
+        /// <summary>
+        /// Reserved / unused.
+        /// </summary>
+        public short Reserved;
+
+        /// <summary>
+        /// Create a new host shader cache entry header.
+        /// </summary>
+        /// <param name="cBuffersCount">Count of constant buffer descriptors</param>
+        /// <param name="sBuffersCount">Count of storage buffer descriptors</param>
+        /// <param name="texturesCount">Count of texture descriptors</param>
+        /// <param name="imagesCount">Count of image descriptors</param>
+        /// <param name="usesInstanceId">Set to true if the shader uses instance id</param>
+        public HostShaderCacheEntryHeader(int cBuffersCount, int sBuffersCount, int texturesCount, int imagesCount, bool usesInstanceId) : this()
+        {
+            CBuffersCount  = cBuffersCount;
+            SBuffersCount  = sBuffersCount;
+            TexturesCount  = texturesCount;
+            ImagesCount    = imagesCount;
+            UsesInstanceId = usesInstanceId;
+            InUse          = true;
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheHeader.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheHeader.cs
new file mode 100644
index 00000000..27f216cc
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/HostShaderCacheHeader.cs
@@ -0,0 +1,42 @@
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
+{
+    /// <summary>
+    /// The header of a shader program in the guest cache.
+    /// </summary>
+    [StructLayout(LayoutKind.Sequential, Pack = 0x1, Size = 0x10)]
+    struct HostShaderCacheHeader
+    {
+        /// <summary>
+        /// The count of shaders defining this program.
+        /// </summary>
+        public byte Count;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public byte Reserved1;
+
+        /// <summary>
+        /// Unused/reserved.
+        /// </summary>
+        public ushort Reserved2;
+
+        /// <summary>
+        /// Size of the shader binary.
+        /// </summary>
+        public int CodeSize;
+
+        /// <summary>
+        /// Create a new host shader cache header.
+        /// </summary>
+        /// <param name="count">The count of shaders defining this program</param>
+        /// <param name="codeSize">The size of the shader binary</param>
+        public HostShaderCacheHeader(byte count, int codeSize) : this()
+        {
+            Count    = count;
+            CodeSize = codeSize;
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs
new file mode 100644
index 00000000..5f1458fa
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs
@@ -0,0 +1,154 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Gpu.Shader.Cache.Definition;
+using Ryujinx.Graphics.Shader;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+    class CachedGpuAccessor : TextureDescriptorCapableGpuAccessor, IGpuAccessor
+    {
+        private readonly GpuContext _context;
+        private readonly ReadOnlyMemory<byte> _data;
+        private readonly GuestGpuAccessorHeader _header;
+        private readonly Dictionary<int, Image.TextureDescriptor> _textureDescriptors;
+
+        /// <summary>
+        /// Creates a new instance of the cached GPU state accessor for shader translation.
+        /// </summary>
+        /// <param name="context">GPU context</param>
+        /// <param name="data">The data of the shader</param>
+        /// <param name="header">The cache of the GPU accessor</param>
+        /// <param name="guestTextureDescriptors">The cache of the texture descriptors</param>
+        public CachedGpuAccessor(GpuContext context, ReadOnlyMemory<byte> data, GuestGpuAccessorHeader header, Dictionary<int, GuestTextureDescriptor> guestTextureDescriptors)
+        {
+            _context = context;
+            _data = data;
+            _header = header;
+            _textureDescriptors = new Dictionary<int, Image.TextureDescriptor>();
+
+            foreach (KeyValuePair<int, GuestTextureDescriptor> guestTextureDescriptor in guestTextureDescriptors)
+            {
+                _textureDescriptors.Add(guestTextureDescriptor.Key, guestTextureDescriptor.Value.Descriptor);
+            }
+        }
+
+        /// <summary>
+        /// Prints a log message.
+        /// </summary>
+        /// <param name="message">Message to print</param>
+        public void Log(string message)
+        {
+            Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}");
+        }
+
+        /// <summary>
+        /// Reads data from GPU memory.
+        /// </summary>
+        /// <typeparam name="T">Type of the data to be read</typeparam>
+        /// <param name="address">GPU virtual address of the data</param>
+        /// <returns>Data at the memory location</returns>
+        public override T MemoryRead<T>(ulong address)
+        {
+            return MemoryMarshal.Cast<byte, T>(_data.Span.Slice((int)address))[0];
+        }
+
+        /// <summary>
+        /// Checks if a given memory address is mapped.
+        /// </summary>
+        /// <param name="address">GPU virtual address to be checked</param>
+        /// <returns>True if the address is mapped, false otherwise</returns>
+        public bool MemoryMapped(ulong address)
+        {
+            return address < (ulong)_data.Length;
+        }
+
+        /// <summary>
+        /// Queries Local Size X for compute shaders.
+        /// </summary>
+        /// <returns>Local Size X</returns>
+        public int QueryComputeLocalSizeX()
+        {
+            return _header.ComputeLocalSizeX;
+        }
+
+        /// <summary>
+        /// Queries Local Size Y for compute shaders.
+        /// </summary>
+        /// <returns>Local Size Y</returns>
+        public int QueryComputeLocalSizeY()
+        {
+            return _header.ComputeLocalSizeY;
+        }
+
+        /// <summary>
+        /// Queries Local Size Z for compute shaders.
+        /// </summary>
+        /// <returns>Local Size Z</returns>
+        public int QueryComputeLocalSizeZ()
+        {
+            return _header.ComputeLocalSizeZ;
+        }
+
+        /// <summary>
+        /// Queries Local Memory size in bytes for compute shaders.
+        /// </summary>
+        /// <returns>Local Memory size in bytes</returns>
+        public int QueryComputeLocalMemorySize()
+        {
+            return _header.ComputeLocalMemorySize;
+        }
+
+        /// <summary>
+        /// Queries Shared Memory size in bytes for compute shaders.
+        /// </summary>
+        /// <returns>Shared Memory size in bytes</returns>
+        public int QueryComputeSharedMemorySize()
+        {
+            return _header.ComputeSharedMemorySize;
+        }
+
+        /// <summary>
+        /// Queries current primitive topology for geometry shaders.
+        /// </summary>
+        /// <returns>Current primitive topology</returns>
+        public InputTopology QueryPrimitiveTopology()
+        {
+            return _header.PrimitiveTopology;
+        }
+
+        /// <summary>
+        /// Queries host storage buffer alignment required.
+        /// </summary>
+        /// <returns>Host storage buffer alignment in bytes</returns>
+        public int QueryStorageBufferOffsetAlignment() => _context.Capabilities.StorageBufferOffsetAlignment;
+
+        /// <summary>
+        /// Queries host support for readable images without a explicit format declaration on the shader.
+        /// </summary>
+        /// <returns>True if formatted image load is supported, false otherwise</returns>
+        public bool QuerySupportsImageLoadFormatted() => _context.Capabilities.SupportsImageLoadFormatted;
+
+        /// <summary>
+        /// Queries host GPU non-constant texture offset support.
+        /// </summary>
+        /// <returns>True if the GPU and driver supports non-constant texture offsets, false otherwise</returns>
+        public bool QuerySupportsNonConstantTextureOffset() => _context.Capabilities.SupportsNonConstantTextureOffset;
+
+        /// <summary>
+        /// Gets the texture descriptor for a given texture on the pool.
+        /// </summary>
+        /// <param name="handle">Index of the texture (this is the word offset of the handle in the constant buffer)</param>
+        /// <returns>Texture descriptor</returns>
+        public override Image.TextureDescriptor GetTextureDescriptor(int handle)
+        {
+            if (!_textureDescriptors.TryGetValue(handle, out Image.TextureDescriptor textureDescriptor))
+            {
+                throw new ArgumentException();
+            }
+
+            return textureDescriptor;
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
index 0eaa534b..b3f1b3a8 100644
--- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
@@ -1,6 +1,5 @@
 using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.GAL;
-using Ryujinx.Graphics.Gpu.Image;
 using Ryujinx.Graphics.Gpu.State;
 using Ryujinx.Graphics.Shader;
 
@@ -9,7 +8,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
     /// <summary>
     /// Represents a GPU state and memory accessor.
     /// </summary>
-    class GpuAccessor : IGpuAccessor
+    class GpuAccessor : TextureDescriptorCapableGpuAccessor, IGpuAccessor
     {
         private readonly GpuContext _context;
         private readonly GpuState _state;
@@ -78,7 +77,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <typeparam name="T">Type of the data to be read</typeparam>
         /// <param name="address">GPU virtual address of the data</param>
         /// <returns>Data at the memory location</returns>
-        public T MemoryRead<T>(ulong address) where T : unmanaged
+        public override T MemoryRead<T>(ulong address)
         {
             return _context.MemoryManager.Read<T>(address);
         }
@@ -134,33 +133,6 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 : _context.Methods.BufferManager.GetGraphicsUniformBufferUseMask(_stageIndex);
         }
 
-        /// <summary>
-        /// Queries texture target information.
-        /// </summary>
-        /// <param name="handle">Texture handle</param>
-        /// <returns>True if the texture is a buffer texture, false otherwise</returns>
-        public bool QueryIsTextureBuffer(int handle)
-        {
-            return GetTextureDescriptor(handle).UnpackTextureTarget() == TextureTarget.TextureBuffer;
-        }
-
-        /// <summary>
-        /// Queries texture target information.
-        /// </summary>
-        /// <param name="handle">Texture handle</param>
-        /// <returns>True if the texture is a rectangle texture, false otherwise</returns>
-        public bool QueryIsTextureRectangle(int handle)
-        {
-            var descriptor = GetTextureDescriptor(handle);
-
-            TextureTarget target = descriptor.UnpackTextureTarget();
-
-            bool is2DTexture = target == TextureTarget.Texture2D ||
-                               target == TextureTarget.Texture2DRect;
-
-            return !descriptor.UnpackTextureCoordNormalized() && is2DTexture;
-        }
-
         /// <summary>
         /// Queries current primitive topology for geometry shaders.
         /// </summary>
@@ -208,76 +180,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <returns>True if the GPU and driver supports non-constant texture offsets, false otherwise</returns>
         public bool QuerySupportsNonConstantTextureOffset() => _context.Capabilities.SupportsNonConstantTextureOffset;
 
-        /// <summary>
-        /// Queries texture format information, for shaders using image load or store.
-        /// </summary>
-        /// <remarks>
-        /// This only returns non-compressed color formats.
-        /// If the format of the texture is a compressed, depth or unsupported format, then a default value is returned.
-        /// </remarks>
-        /// <param name="handle">Texture handle</param>
-        /// <returns>Color format of the non-compressed texture</returns>
-        public TextureFormat QueryTextureFormat(int handle)
-        {
-            var descriptor = GetTextureDescriptor(handle);
-
-            if (!FormatTable.TryGetTextureFormat(descriptor.UnpackFormat(), descriptor.UnpackSrgb(), out FormatInfo formatInfo))
-            {
-                return TextureFormat.Unknown;
-            }
-
-            return formatInfo.Format switch
-            {
-                Format.R8Unorm           => TextureFormat.R8Unorm,
-                Format.R8Snorm           => TextureFormat.R8Snorm,
-                Format.R8Uint            => TextureFormat.R8Uint,
-                Format.R8Sint            => TextureFormat.R8Sint,
-                Format.R16Float          => TextureFormat.R16Float,
-                Format.R16Unorm          => TextureFormat.R16Unorm,
-                Format.R16Snorm          => TextureFormat.R16Snorm,
-                Format.R16Uint           => TextureFormat.R16Uint,
-                Format.R16Sint           => TextureFormat.R16Sint,
-                Format.R32Float          => TextureFormat.R32Float,
-                Format.R32Uint           => TextureFormat.R32Uint,
-                Format.R32Sint           => TextureFormat.R32Sint,
-                Format.R8G8Unorm         => TextureFormat.R8G8Unorm,
-                Format.R8G8Snorm         => TextureFormat.R8G8Snorm,
-                Format.R8G8Uint          => TextureFormat.R8G8Uint,
-                Format.R8G8Sint          => TextureFormat.R8G8Sint,
-                Format.R16G16Float       => TextureFormat.R16G16Float,
-                Format.R16G16Unorm       => TextureFormat.R16G16Unorm,
-                Format.R16G16Snorm       => TextureFormat.R16G16Snorm,
-                Format.R16G16Uint        => TextureFormat.R16G16Uint,
-                Format.R16G16Sint        => TextureFormat.R16G16Sint,
-                Format.R32G32Float       => TextureFormat.R32G32Float,
-                Format.R32G32Uint        => TextureFormat.R32G32Uint,
-                Format.R32G32Sint        => TextureFormat.R32G32Sint,
-                Format.R8G8B8A8Unorm     => TextureFormat.R8G8B8A8Unorm,
-                Format.R8G8B8A8Snorm     => TextureFormat.R8G8B8A8Snorm,
-                Format.R8G8B8A8Uint      => TextureFormat.R8G8B8A8Uint,
-                Format.R8G8B8A8Sint      => TextureFormat.R8G8B8A8Sint,
-                Format.R8G8B8A8Srgb      => TextureFormat.R8G8B8A8Unorm,
-                Format.R16G16B16A16Float => TextureFormat.R16G16B16A16Float,
-                Format.R16G16B16A16Unorm => TextureFormat.R16G16B16A16Unorm,
-                Format.R16G16B16A16Snorm => TextureFormat.R16G16B16A16Snorm,
-                Format.R16G16B16A16Uint  => TextureFormat.R16G16B16A16Uint,
-                Format.R16G16B16A16Sint  => TextureFormat.R16G16B16A16Sint,
-                Format.R32G32B32A32Float => TextureFormat.R32G32B32A32Float,
-                Format.R32G32B32A32Uint  => TextureFormat.R32G32B32A32Uint,
-                Format.R32G32B32A32Sint  => TextureFormat.R32G32B32A32Sint,
-                Format.R10G10B10A2Unorm  => TextureFormat.R10G10B10A2Unorm,
-                Format.R10G10B10A2Uint   => TextureFormat.R10G10B10A2Uint,
-                Format.R11G11B10Float    => TextureFormat.R11G11B10Float,
-                _                        => TextureFormat.Unknown
-            };
-        }
-
         /// <summary>
         /// Gets the texture descriptor for a given texture on the pool.
         /// </summary>
-        /// <param name="handle">Index of the texture (this is the shader "fake" handle)</param>
+        /// <param name="handle">Index of the texture (this is the word offset of the handle in the constant buffer)</param>
         /// <returns>Texture descriptor</returns>
-        private Image.TextureDescriptor GetTextureDescriptor(int handle)
+        public override Image.TextureDescriptor GetTextureDescriptor(int handle)
         {
             if (_compute)
             {
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs
index de06e5e0..efdbc3eb 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderBundle.cs
@@ -39,7 +39,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             foreach (ShaderCodeHolder holder in Shaders)
             {
-                holder?.HostShader.Dispose();
+                holder?.HostShader?.Dispose();
             }
         }
     }
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index 131aa6b7..ac5aedbe 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -1,9 +1,17 @@
+using Ryujinx.Common;
+using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Shader.Cache;
+using Ryujinx.Graphics.Gpu.Shader.Cache.Definition;
 using Ryujinx.Graphics.Gpu.State;
 using Ryujinx.Graphics.Shader;
 using Ryujinx.Graphics.Shader.Translation;
 using System;
 using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 
 namespace Ryujinx.Graphics.Gpu.Shader
 {
@@ -21,6 +29,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
         private readonly Dictionary<ulong, List<ShaderBundle>> _cpPrograms;
         private readonly Dictionary<ShaderAddresses, List<ShaderBundle>> _gpPrograms;
 
+        private CacheManager _cacheManager;
+
+        private Dictionary<Hash128, ShaderBundle> _gpProgramsDiskCache;
+        private Dictionary<Hash128, ShaderBundle> _cpProgramsDiskCache;
+
+        /// <summary>
+        /// Version of the codegen (to be incremented when codegen changes).
+        /// </summary>
+        private const ulong ShaderCodeGenVersion = 1;
+
         /// <summary>
         /// Creates a new instance of the shader cache.
         /// </summary>
@@ -33,6 +51,251 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             _cpPrograms = new Dictionary<ulong, List<ShaderBundle>>();
             _gpPrograms = new Dictionary<ShaderAddresses, List<ShaderBundle>>();
+            _gpProgramsDiskCache = new Dictionary<Hash128, ShaderBundle>();
+            _cpProgramsDiskCache = new Dictionary<Hash128, ShaderBundle>();
+        }
+
+        /// <summary>
+        /// Initialize the cache.
+        /// </summary>
+        internal void Initialize()
+        {
+            if (GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null)
+            {
+                _cacheManager = new CacheManager(CacheGraphicsApi.OpenGL, CacheHashType.XxHash128, "glsl", GraphicsConfig.TitleId, ShaderCodeGenVersion);
+
+                HashSet<Hash128> invalidEntries = new HashSet<Hash128>();
+
+                ReadOnlySpan<Hash128> guestProgramList = _cacheManager.GetGuestProgramList();
+
+                for (int programIndex = 0; programIndex < guestProgramList.Length; programIndex++)
+                {
+                    Hash128 key = guestProgramList[programIndex];
+
+                    Logger.Info?.Print(LogClass.Gpu, $"Compiling shader {key} ({programIndex + 1} / {guestProgramList.Length})");
+
+                    byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key);
+                    bool hasHostCache = hostProgramBinary != null;
+
+                    IProgram hostProgram = null;
+
+                    // If the program sources aren't in the cache, compile from saved guest program.
+                    byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key);
+
+                    if (guestProgram == null)
+                    {
+                        Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)");
+
+                        // Should not happen, but if someone messed with the cache it's better to catch it.
+                        invalidEntries.Add(key);
+
+                        continue;
+                    }
+
+                    ReadOnlySpan<byte> guestProgramReadOnlySpan = guestProgram;
+
+                    ReadOnlySpan<GuestShaderCacheEntry> cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader);
+
+                    if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute)
+                    {
+                        Debug.Assert(cachedShaderEntries.Length == 1);
+
+                        GuestShaderCacheEntry entry = cachedShaderEntries[0];
+
+                        HostShaderCacheEntry[] hostShaderEntries = null;
+
+                        // Try loading host shader binary.
+                        if (hasHostCache)
+                        {
+                            hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan<byte> hostProgramBinarySpan);
+                            hostProgramBinary = hostProgramBinarySpan.ToArray();
+                            hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary);
+                        }
+
+                        bool isHostProgramValid = hostProgram != null;
+
+                        ShaderProgram program;
+                        ShaderProgramInfo shaderProgramInfo;
+
+                        // Reconstruct code holder.
+                        if (isHostProgramValid)
+                        {
+                            program = new ShaderProgram(entry.Header.Stage, "", entry.Header.Size, entry.Header.SizeA);
+                            shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo();
+                        }
+                        else
+                        {
+                            IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+
+                            program = Translator.CreateContext(0, gpuAccessor, DefaultFlags | TranslationFlags.Compute).Translate(out shaderProgramInfo);
+                        }
+
+                        ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
+
+                        // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again.
+                        if (hostProgram == null)
+                        {
+                            Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest...");
+
+                            // Compile shader and create program as the shader program binary got invalidated.
+                            shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code);
+                            hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null);
+
+                            // As the host program was invalidated, save the new entry in the cache.
+                            hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader });
+
+                            if (hasHostCache)
+                            {
+                                _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary);
+                            }
+                            else
+                            {
+                                Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)");
+
+                                _cacheManager.AddHostProgram(ref key, hostProgramBinary);
+                            }
+                        }
+
+                        _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader));
+                    }
+                    else
+                    {
+                        Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages);
+
+                        ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length];
+                        List<ShaderProgram> shaderPrograms = new List<ShaderProgram>();
+
+                        TransformFeedbackDescriptor[] tfd = ReadTransformationFeedbackInformations(ref guestProgramReadOnlySpan, fileHeader);
+
+                        TranslationFlags flags = DefaultFlags;
+
+                        if (tfd != null)
+                        {
+                            flags = TranslationFlags.Feedback;
+                        }
+
+                        TranslationCounts counts = new TranslationCounts();
+
+                        HostShaderCacheEntry[] hostShaderEntries = null;
+
+                        // Try loading host shader binary.
+                        if (hasHostCache)
+                        {
+                            hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan<byte> hostProgramBinarySpan);
+                            hostProgramBinary = hostProgramBinarySpan.ToArray();
+                            hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary);
+                        }
+
+                        bool isHostProgramValid = hostProgram != null;
+
+                        // Reconstruct code holder.
+                        for (int i = 0; i < cachedShaderEntries.Length; i++)
+                        {
+                            GuestShaderCacheEntry entry = cachedShaderEntries[i];
+
+                            if (entry == null)
+                            {
+                                continue;
+                            }
+
+                            ShaderProgram program;
+
+                            if (entry.Header.SizeA != 0)
+                            {
+                                ShaderProgramInfo shaderProgramInfo;
+
+                                if (isHostProgramValid)
+                                {
+                                    program = new ShaderProgram(entry.Header.Stage, "", entry.Header.Size, entry.Header.SizeA);
+                                    shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo();
+                                }
+                                else
+                                {
+                                    IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+
+                                    program = Translator.CreateContext((ulong)entry.Header.Size, 0, gpuAccessor, flags, counts).Translate(out shaderProgramInfo);
+                                }
+
+                                // NOTE: Vertex B comes first in the shader cache.
+                                byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray();
+                                byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray();
+
+                                shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2);
+                            }
+                            else
+                            {
+                                ShaderProgramInfo shaderProgramInfo;
+
+                                if (isHostProgramValid)
+                                {
+                                    program = new ShaderProgram(entry.Header.Stage, "", entry.Header.Size, entry.Header.SizeA);
+                                    shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo();
+                                }
+                                else
+                                {
+                                    IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+
+                                    program = Translator.CreateContext(0, gpuAccessor, flags, counts).Translate(out shaderProgramInfo);
+                                }
+
+                                shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
+                            }
+
+                            shaderPrograms.Add(program);
+                        }
+
+                        // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again.
+                        if (!isHostProgramValid)
+                        {
+                            Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest...");
+
+                            List<IShader> hostShaders = new List<IShader>();
+
+                            // Compile shaders and create program as the shader program binary got invalidated.
+                            for (int stage = 0; stage < Constants.ShaderStages; stage++)
+                            {
+                                ShaderProgram program = shaders[stage]?.Program;
+
+                                if (program == null)
+                                {
+                                    continue;
+                                }
+
+                                IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code);
+
+                                shaders[stage].HostShader = hostShader;
+
+                                hostShaders.Add(hostShader);
+                            }
+
+                            hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd);
+
+                            // As the host program was invalidated, save the new entry in the cache.
+                            hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders);
+
+                            if (hasHostCache)
+                            {
+                                _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary);
+                            }
+                            else
+                            {
+                                Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)");
+
+                                _cacheManager.AddHostProgram(ref key, hostProgramBinary);
+                            }
+                        }
+
+                        _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders));
+                    }
+                }
+
+                // Remove entries that are broken in the cache
+                _cacheManager.RemoveManifestEntries(invalidEntries);
+                _cacheManager.FlushToArchive();
+                _cacheManager.Synchronize();
+
+                Logger.Info?.Print(LogClass.Gpu, "Shader cache loaded.");
+            }
         }
 
         /// <summary>
@@ -71,7 +334,9 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 }
             }
 
-            ShaderCodeHolder shader = TranslateComputeShader(
+            TranslatorContext[] shaderContexts = new TranslatorContext[1];
+
+            shaderContexts[0] = DecodeComputeShader(
                 state,
                 gpuVa,
                 localSizeX,
@@ -80,11 +345,45 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 localMemorySize,
                 sharedMemorySize);
 
-            shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code);
+            bool isShaderCacheEnabled = _cacheManager != null;
 
-            IProgram hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null);
+            byte[] programCode = null;
+            Hash128 programCodeHash = default;
+            GuestShaderCacheEntryHeader[] shaderCacheEntries = null;
 
-            ShaderBundle cpShader = new ShaderBundle(hostProgram, shader);
+            if (isShaderCacheEnabled)
+            {
+                // Compute hash and prepare data for shader disk cache comparison.
+                GetProgramInformations(null, shaderContexts, out programCode, out programCodeHash, out shaderCacheEntries);
+            }
+
+            ShaderBundle cpShader;
+
+            // Search for the program hash in loaded shaders.
+            if (!isShaderCacheEnabled || !_cpProgramsDiskCache.TryGetValue(programCodeHash, out cpShader))
+            {
+                if (isShaderCacheEnabled)
+                {
+                    Logger.Debug?.Print(LogClass.Gpu, $"Shader {programCodeHash} not in cache, compiling!");
+                }
+
+                // The shader isn't currently cached, translate it and compile it.
+                ShaderCodeHolder shader = TranslateShader(shaderContexts[0]);
+
+                shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code);
+
+                IProgram hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null);
+
+                byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader });
+
+                cpShader = new ShaderBundle(hostProgram, shader);
+
+                if (isShaderCacheEnabled)
+                {
+                    _cpProgramsDiskCache.Add(programCodeHash, cpShader);
+                    _cacheManager.SaveProgram(ref programCodeHash, CreateGuestProgramDump(programCode, shaderCacheEntries, null), hostProgramBinary);
+                }
+            }
 
             if (!isCached)
             {
@@ -123,9 +422,9 @@ namespace Ryujinx.Graphics.Gpu.Shader
                 }
             }
 
-            ShaderCodeHolder[] shaders = new ShaderCodeHolder[Constants.ShaderStages];
+            TranslatorContext[] shaderContexts = new TranslatorContext[Constants.ShaderStages];
 
-            var tfd = GetTransformFeedbackDescriptors(state);
+            TransformFeedbackDescriptor[] tfd = GetTransformFeedbackDescriptors(state);
 
             TranslationFlags flags = DefaultFlags;
 
@@ -138,40 +437,80 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             if (addresses.VertexA != 0)
             {
-                shaders[0] = TranslateGraphicsShader(state, counts, flags, ShaderStage.Vertex, addresses.Vertex, addresses.VertexA);
+                shaderContexts[0] = DecodeGraphicsShader(state, counts, flags, ShaderStage.Vertex, addresses.Vertex, addresses.VertexA);
             }
             else
             {
-                shaders[0] = TranslateGraphicsShader(state, counts, flags, ShaderStage.Vertex, addresses.Vertex);
+                shaderContexts[0] = DecodeGraphicsShader(state, counts, flags, ShaderStage.Vertex, addresses.Vertex);
             }
 
-            shaders[1] = TranslateGraphicsShader(state, counts, flags, ShaderStage.TessellationControl,    addresses.TessControl);
-            shaders[2] = TranslateGraphicsShader(state, counts, flags, ShaderStage.TessellationEvaluation, addresses.TessEvaluation);
-            shaders[3] = TranslateGraphicsShader(state, counts, flags, ShaderStage.Geometry,               addresses.Geometry);
-            shaders[4] = TranslateGraphicsShader(state, counts, flags, ShaderStage.Fragment,               addresses.Fragment);
+            shaderContexts[1] = DecodeGraphicsShader(state, counts, flags, ShaderStage.TessellationControl, addresses.TessControl);
+            shaderContexts[2] = DecodeGraphicsShader(state, counts, flags, ShaderStage.TessellationEvaluation, addresses.TessEvaluation);
+            shaderContexts[3] = DecodeGraphicsShader(state, counts, flags, ShaderStage.Geometry, addresses.Geometry);
+            shaderContexts[4] = DecodeGraphicsShader(state, counts, flags, ShaderStage.Fragment, addresses.Fragment);
 
-            List<IShader> hostShaders = new List<IShader>();
+            bool isShaderCacheEnabled = _cacheManager != null;
 
-            for (int stage = 0; stage < Constants.ShaderStages; stage++)
+            byte[] programCode = null;
+            Hash128 programCodeHash = default;
+            GuestShaderCacheEntryHeader[] shaderCacheEntries = null;
+
+            if (isShaderCacheEnabled)
             {
-                ShaderProgram program = shaders[stage]?.Program;
+                // Compute hash and prepare data for shader disk cache comparison.
+                GetProgramInformations(tfd, shaderContexts, out programCode, out programCodeHash, out shaderCacheEntries);
+            }
 
-                if (program == null)
+            ShaderBundle gpShaders;
+
+            // Search for the program hash in loaded shaders.
+            if (!isShaderCacheEnabled || !_gpProgramsDiskCache.TryGetValue(programCodeHash, out gpShaders))
+            {
+                if (isShaderCacheEnabled)
                 {
-                    continue;
+                    Logger.Debug?.Print(LogClass.Gpu, $"Shader {programCodeHash} not in cache, compiling!");
                 }
 
-                IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code);
+                // The shader isn't currently cached, translate it and compile it.
+                ShaderCodeHolder[] shaders = new ShaderCodeHolder[Constants.ShaderStages];
 
-                shaders[stage].HostShader = hostShader;
+                shaders[0] = TranslateShader(shaderContexts[0]);
+                shaders[1] = TranslateShader(shaderContexts[1]);
+                shaders[2] = TranslateShader(shaderContexts[2]);
+                shaders[3] = TranslateShader(shaderContexts[3]);
+                shaders[4] = TranslateShader(shaderContexts[4]);
 
-                hostShaders.Add(hostShader);
+                List<IShader> hostShaders = new List<IShader>();
+
+                for (int stage = 0; stage < Constants.ShaderStages; stage++)
+                {
+                    ShaderProgram program = shaders[stage]?.Program;
+
+                    if (program == null)
+                    {
+                        continue;
+                    }
+
+                    IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code);
+
+                    shaders[stage].HostShader = hostShader;
+
+                    hostShaders.Add(hostShader);
+                }
+
+                IProgram hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd);
+
+                byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders);
+
+                gpShaders = new ShaderBundle(hostProgram, shaders);
+
+                if (isShaderCacheEnabled)
+                {
+                    _gpProgramsDiskCache.Add(programCodeHash, gpShaders);
+                    _cacheManager.SaveProgram(ref programCodeHash, CreateGuestProgramDump(programCode, shaderCacheEntries, tfd), hostProgramBinary);
+                }
             }
 
-            IProgram hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd);
-
-            ShaderBundle gpShaders = new ShaderBundle(hostProgram, shaders);
-
             if (!isCached)
             {
                 list = new List<ShaderBundle>();
@@ -286,7 +625,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
         }
 
         /// <summary>
-        /// Translates the binary Maxwell shader code to something that the host API accepts.
+        /// Decode the binary Maxwell shader code to a translator context.
         /// </summary>
         /// <param name="state">Current GPU state</param>
         /// <param name="gpuVa">GPU virtual address of the binary shader code</param>
@@ -295,8 +634,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="localSizeZ">Local group size Z of the computer shader</param>
         /// <param name="localMemorySize">Local memory size of the compute shader</param>
         /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
-        /// <returns>Compiled compute shader code</returns>
-        private ShaderCodeHolder TranslateComputeShader(
+        /// <returns>The generated translator context</returns>
+        private TranslatorContext DecodeComputeShader(
             GpuState state,
             ulong gpuVa,
             int localSizeX,
@@ -312,25 +651,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             GpuAccessor gpuAccessor = new GpuAccessor(_context, state, localSizeX, localSizeY, localSizeZ, localMemorySize, sharedMemorySize);
 
-            ShaderProgram program;
-
-            program = Translator.Translate(gpuVa, gpuAccessor, DefaultFlags | TranslationFlags.Compute);
-
-            byte[] code = _context.MemoryManager.GetSpan(gpuVa, program.Size).ToArray();
-
-            _dumper.Dump(code, compute: true, out string fullPath, out string codePath);
-
-            if (fullPath != null && codePath != null)
-            {
-                program.Prepend("// " + codePath);
-                program.Prepend("// " + fullPath);
-            }
-
-            return new ShaderCodeHolder(program, code);
+            return Translator.CreateContext(gpuVa, gpuAccessor, DefaultFlags | TranslationFlags.Compute);
         }
 
         /// <summary>
-        /// Translates the binary Maxwell shader code to something that the host API accepts.
+        /// Decode the binary Maxwell shader code to a translator context.
         /// </summary>
         /// <remarks>
         /// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader.
@@ -341,8 +666,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// <param name="stage">Shader stage</param>
         /// <param name="gpuVa">GPU virtual address of the shader code</param>
         /// <param name="gpuVaA">Optional GPU virtual address of the "Vertex A" shader code</param>
-        /// <returns>Compiled graphics shader code</returns>
-        private ShaderCodeHolder TranslateGraphicsShader(
+        /// <returns>The generated translator context</returns>
+        private TranslatorContext DecodeGraphicsShader(
             GpuState state,
             TranslationCounts counts,
             TranslationFlags flags,
@@ -359,14 +684,36 @@ namespace Ryujinx.Graphics.Gpu.Shader
 
             if (gpuVaA != 0)
             {
-                ShaderProgram program = Translator.Translate(gpuVaA, gpuVa, gpuAccessor, flags, counts);
+                return Translator.CreateContext(gpuVaA, gpuVa, gpuAccessor, flags, counts);
+            }
+            else
+            {
+                return Translator.CreateContext(gpuVa, gpuAccessor, flags, counts);
+            }
+        }
 
-                byte[] codeA = _context.MemoryManager.GetSpan(gpuVaA, program.SizeA).ToArray();
-                byte[] codeB = _context.MemoryManager.GetSpan(gpuVa,  program.Size).ToArray();
+        /// <summary>
+        /// Translates a previously generated translator context to something that the host API accepts.
+        /// </summary>
+        /// <param name="translatorContext">Current translator context to translate</param>
+        /// <returns>Compiled graphics shader code</returns>
+        private ShaderCodeHolder TranslateShader(TranslatorContext translatorContext)
+        {
+            if (translatorContext == null)
+            {
+                return null;
+            }
+
+            if (translatorContext.AddressA != 0)
+            {
+                byte[] codeA = _context.MemoryManager.GetSpan(translatorContext.AddressA, translatorContext.SizeA).ToArray();
+                byte[] codeB = _context.MemoryManager.GetSpan(translatorContext.Address, translatorContext.Size).ToArray();
 
                 _dumper.Dump(codeA, compute: false, out string fullPathA, out string codePathA);
                 _dumper.Dump(codeB, compute: false, out string fullPathB, out string codePathB);
 
+                ShaderProgram program = translatorContext.Translate(out ShaderProgramInfo shaderProgramInfo);
+
                 if (fullPathA != null && fullPathB != null && codePathA != null && codePathB != null)
                 {
                     program.Prepend("// " + codePathB);
@@ -375,23 +722,23 @@ namespace Ryujinx.Graphics.Gpu.Shader
                     program.Prepend("// " + fullPathA);
                 }
 
-                return new ShaderCodeHolder(program, codeB, codeA);
+                return new ShaderCodeHolder(program, shaderProgramInfo, codeB, codeA);
             }
             else
             {
-                ShaderProgram program = Translator.Translate(gpuVa, gpuAccessor, flags, counts);
-
-                byte[] code = _context.MemoryManager.GetSpan(gpuVa, program.Size).ToArray();
+                byte[] code = _context.MemoryManager.GetSpan(translatorContext.Address, translatorContext.Size).ToArray();
 
                 _dumper.Dump(code, compute: false, out string fullPath, out string codePath);
 
+                ShaderProgram program = translatorContext.Translate(out ShaderProgramInfo shaderProgramInfo);
+
                 if (fullPath != null && codePath != null)
                 {
                     program.Prepend("// " + codePath);
                     program.Prepend("// " + fullPath);
                 }
 
-                return new ShaderCodeHolder(program, code);
+                return new ShaderCodeHolder(program, shaderProgramInfo, code);
             }
         }
 
@@ -416,6 +763,194 @@ namespace Ryujinx.Graphics.Gpu.Shader
                     bundle.Dispose();
                 }
             }
+
+            _cacheManager?.Dispose();
+        }
+
+        /// <summary>
+        /// Create a guest shader program.
+        /// </summary>
+        /// <param name="programCode">The program code of the shader code</param>
+        /// <param name="shaderCacheEntries">The resulting guest shader entries header</param>
+        /// <param name="tfd">The transform feedback descriptors in use</param>
+        /// <returns>The resulting guest shader program</returns>
+        private static byte[] CreateGuestProgramDump(ReadOnlySpan<byte> programCode, GuestShaderCacheEntryHeader[] shaderCacheEntries, TransformFeedbackDescriptor[] tfd)
+        {
+            using (MemoryStream resultStream = new MemoryStream())
+            {
+                BinaryWriter resultStreamWriter = new BinaryWriter(resultStream);
+
+                byte transformFeedbackCount = 0;
+
+                if (tfd != null)
+                {
+                    transformFeedbackCount = (byte)tfd.Length;
+                }
+
+                // Header
+                resultStreamWriter.WriteStruct(new GuestShaderCacheHeader((byte)shaderCacheEntries.Length, transformFeedbackCount));
+
+                // Write all entries header
+                foreach (GuestShaderCacheEntryHeader entry in shaderCacheEntries)
+                {
+                    resultStreamWriter.WriteStruct(entry);
+                }
+
+                // Finally, write all program code and all transform feedback information.
+                resultStreamWriter.Write(programCode);
+
+                return resultStream.ToArray();
+            }
+        }
+
+        /// <summary>
+        /// Write transform feedback guest information to the given stream.
+        /// </summary>
+        /// <param name="stream">The stream to write data to</param>
+        /// <param name="tfd">The current transform feedback descriptors used</param>
+        private static void WriteTransformationFeedbackInformation(Stream stream, TransformFeedbackDescriptor[] tfd)
+        {
+            if (tfd != null)
+            {
+                BinaryWriter writer = new BinaryWriter(stream);
+
+                foreach (TransformFeedbackDescriptor transform in tfd)
+                {
+                    writer.WriteStruct(new GuestShaderCacheTransformFeedbackHeader(transform.BufferIndex, transform.Stride, transform.VaryingLocations.Length));
+                    writer.Write(transform.VaryingLocations);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Read transform feedback descriptors from guest.
+        /// </summary>
+        /// <param name="data">The raw guest transform feedback descriptors</param>
+        /// <param name="header">The guest shader program header</param>
+        /// <returns>The transform feedback descriptors read from guest</returns>
+        private static TransformFeedbackDescriptor[] ReadTransformationFeedbackInformations(ref ReadOnlySpan<byte> data, GuestShaderCacheHeader header)
+        {
+            if (header.TransformFeedbackCount != 0)
+            {
+                TransformFeedbackDescriptor[] result = new TransformFeedbackDescriptor[header.TransformFeedbackCount];
+
+                for (int i = 0; i < result.Length; i++)
+                {
+                    GuestShaderCacheTransformFeedbackHeader feedbackHeader = MemoryMarshal.Read<GuestShaderCacheTransformFeedbackHeader>(data);
+
+                    result[i] = new TransformFeedbackDescriptor(feedbackHeader.BufferIndex, feedbackHeader.Stride, data.Slice(Unsafe.SizeOf<GuestShaderCacheTransformFeedbackHeader>(), feedbackHeader.VaryingLocationsLength).ToArray());
+
+                    data = data.Slice(Unsafe.SizeOf<GuestShaderCacheTransformFeedbackHeader>() + feedbackHeader.VaryingLocationsLength);
+                }
+
+                return result;
+            }
+
+            return null;
+        }
+
+        /// <summary>
+        /// Create a new instance of <see cref="GuestGpuAccessorHeader"/> from an gpu accessor.
+        /// </summary>
+        /// <param name="gpuAccessor">The gpu accessor</param>
+        /// <returns>a new instance of <see cref="GuestGpuAccessorHeader"/></returns>
+        private static GuestGpuAccessorHeader CreateGuestGpuAccessorCache(IGpuAccessor gpuAccessor)
+        {
+            return new GuestGpuAccessorHeader
+            {
+                ComputeLocalSizeX = gpuAccessor.QueryComputeLocalSizeX(),
+                ComputeLocalSizeY = gpuAccessor.QueryComputeLocalSizeY(),
+                ComputeLocalSizeZ = gpuAccessor.QueryComputeLocalSizeZ(),
+                ComputeLocalMemorySize = gpuAccessor.QueryComputeLocalMemorySize(),
+                ComputeSharedMemorySize = gpuAccessor.QueryComputeSharedMemorySize(),
+                PrimitiveTopology = gpuAccessor.QueryPrimitiveTopology(),
+            };
+        }
+
+        /// <summary>
+        /// Write the guest GpuAccessor informations to the given stream.
+        /// </summary>
+        /// <param name="stream">The stream to write the guest GpuAcessor</param>
+        /// <param name="shaderContext">The shader tranlator context in use</param>
+        /// <returns>The guest gpu accessor header</returns>
+        private static GuestGpuAccessorHeader WriteGuestGpuAccessorCache(Stream stream, TranslatorContext shaderContext)
+        {
+            BinaryWriter writer = new BinaryWriter(stream);
+
+            GuestGpuAccessorHeader header = CreateGuestGpuAccessorCache(shaderContext.GpuAccessor);
+
+            // If we have a full gpu accessor, cache textures descriptors
+            if (shaderContext.GpuAccessor is GpuAccessor gpuAccessor)
+            {
+                HashSet<int> textureHandlesInUse = shaderContext.TextureHandlesForCache;
+
+                header.TextureDescriptorCount = textureHandlesInUse.Count;
+
+                foreach (int textureHandle in textureHandlesInUse)
+                {
+                    GuestTextureDescriptor textureDescriptor = gpuAccessor.GetTextureDescriptor(textureHandle).ToCache();
+
+                    textureDescriptor.Handle = (uint)textureHandle;
+
+                    writer.WriteStruct(textureDescriptor);
+                }
+            }
+
+            return header;
+        }
+
+        /// <summary>
+        /// Get the shader program information for use on the shader cache.
+        /// </summary>
+        /// <param name="tfd">The current transform feedback descriptors used</param>
+        /// <param name="shaderContexts">The shader translators context in use</param>
+        /// <param name="programCode">The resulting raw shader program code</param>
+        /// <param name="programCodeHash">The resulting raw shader program code hash</param>
+        /// <param name="entries">The resulting guest shader entries header</param>
+        private void GetProgramInformations(TransformFeedbackDescriptor[] tfd, ReadOnlySpan<TranslatorContext> shaderContexts, out byte[] programCode, out Hash128 programCodeHash, out GuestShaderCacheEntryHeader[] entries)
+        {
+            GuestShaderCacheEntryHeader ComputeStage(Stream stream, TranslatorContext context)
+            {
+                if (context == null)
+                {
+                    return new GuestShaderCacheEntryHeader();
+                }
+
+                ReadOnlySpan<byte> data = _context.MemoryManager.GetSpan(context.Address, context.Size);
+
+                stream.Write(data);
+
+                int size = data.Length;
+                int sizeA = 0;
+
+                if (context.AddressA != 0)
+                {
+                    data = _context.MemoryManager.GetSpan(context.AddressA, context.SizeA);
+
+                    sizeA = data.Length;
+
+                    stream.Write(data);
+                }
+
+                GuestGpuAccessorHeader gpuAccessorHeader = WriteGuestGpuAccessorCache(stream, context);
+
+                return new GuestShaderCacheEntryHeader(context.Stage, size, sizeA, gpuAccessorHeader);
+            }
+
+            entries = new GuestShaderCacheEntryHeader[shaderContexts.Length];
+
+            using (MemoryStream stream = new MemoryStream())
+            {
+                for (int i = 0; i < shaderContexts.Length; i++)
+                {
+                    entries[i] = ComputeStage(stream, shaderContexts[i]);
+                }
+
+                WriteTransformationFeedbackInformation(stream, tfd);
+
+                programCode = stream.ToArray();
+                programCodeHash = _cacheManager.ComputeHash(programCode);
+            }
         }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs
index dd90788e..dbf2d6f5 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCodeHolder.cs
@@ -13,9 +13,15 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// </summary>
         public ShaderProgram Program { get; }
 
+        /// <summary>
+        /// Shader program information.
+        /// </summary>
+        public ShaderProgramInfo Info { get; }
+
         /// <summary>
         /// Host shader object.
         /// </summary>
+        /// <remarks>Null if the host shader program cache is in use.</remarks>
         public IShader HostShader { get; set; }
 
         /// <summary>
@@ -32,11 +38,13 @@ namespace Ryujinx.Graphics.Gpu.Shader
         /// Creates a new instace of the shader code holder.
         /// </summary>
         /// <param name="program">Shader program</param>
+        /// <param name="info">Shader program information</param>
         /// <param name="code">Maxwell binary shader code</param>
         /// <param name="code2">Optional binary shader code of the "Vertex A" shader, when combined with "Vertex B"</param>
-        public ShaderCodeHolder(ShaderProgram program, byte[] code, byte[] code2 = null)
+        public ShaderCodeHolder(ShaderProgram program, ShaderProgramInfo info, byte[] code, byte[] code2 = null)
         {
             Program = program;
+            Info    = info;
             Code    = code;
             Code2   = code2;
         }
diff --git a/Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs
new file mode 100644
index 00000000..7901fe59
--- /dev/null
+++ b/Ryujinx.Graphics.Gpu/Shader/TextureDescriptorCapableGpuAccessor.cs
@@ -0,0 +1,104 @@
+using Ryujinx.Graphics.GAL;
+using Ryujinx.Graphics.Gpu.Image;
+using Ryujinx.Graphics.Shader;
+
+namespace Ryujinx.Graphics.Gpu.Shader
+{
+    abstract class TextureDescriptorCapableGpuAccessor : IGpuAccessor
+    {
+        public abstract T MemoryRead<T>(ulong address) where T : unmanaged;
+
+        public abstract Image.TextureDescriptor GetTextureDescriptor(int handle);
+
+        /// <summary>
+        /// Queries texture format information, for shaders using image load or store.
+        /// </summary>
+        /// <remarks>
+        /// This only returns non-compressed color formats.
+        /// If the format of the texture is a compressed, depth or unsupported format, then a default value is returned.
+        /// </remarks>
+        /// <param name="handle">Texture handle</param>
+        /// <returns>Color format of the non-compressed texture</returns>
+        public TextureFormat QueryTextureFormat(int handle)
+        {
+            var descriptor = GetTextureDescriptor(handle);
+
+            if (!FormatTable.TryGetTextureFormat(descriptor.UnpackFormat(), descriptor.UnpackSrgb(), out FormatInfo formatInfo))
+            {
+                return TextureFormat.Unknown;
+            }
+
+            return formatInfo.Format switch
+            {
+                Format.R8Unorm           => TextureFormat.R8Unorm,
+                Format.R8Snorm           => TextureFormat.R8Snorm,
+                Format.R8Uint            => TextureFormat.R8Uint,
+                Format.R8Sint            => TextureFormat.R8Sint,
+                Format.R16Float          => TextureFormat.R16Float,
+                Format.R16Unorm          => TextureFormat.R16Unorm,
+                Format.R16Snorm          => TextureFormat.R16Snorm,
+                Format.R16Uint           => TextureFormat.R16Uint,
+                Format.R16Sint           => TextureFormat.R16Sint,
+                Format.R32Float          => TextureFormat.R32Float,
+                Format.R32Uint           => TextureFormat.R32Uint,
+                Format.R32Sint           => TextureFormat.R32Sint,
+                Format.R8G8Unorm         => TextureFormat.R8G8Unorm,
+                Format.R8G8Snorm         => TextureFormat.R8G8Snorm,
+                Format.R8G8Uint          => TextureFormat.R8G8Uint,
+                Format.R8G8Sint          => TextureFormat.R8G8Sint,
+                Format.R16G16Float       => TextureFormat.R16G16Float,
+                Format.R16G16Unorm       => TextureFormat.R16G16Unorm,
+                Format.R16G16Snorm       => TextureFormat.R16G16Snorm,
+                Format.R16G16Uint        => TextureFormat.R16G16Uint,
+                Format.R16G16Sint        => TextureFormat.R16G16Sint,
+                Format.R32G32Float       => TextureFormat.R32G32Float,
+                Format.R32G32Uint        => TextureFormat.R32G32Uint,
+                Format.R32G32Sint        => TextureFormat.R32G32Sint,
+                Format.R8G8B8A8Unorm     => TextureFormat.R8G8B8A8Unorm,
+                Format.R8G8B8A8Snorm     => TextureFormat.R8G8B8A8Snorm,
+                Format.R8G8B8A8Uint      => TextureFormat.R8G8B8A8Uint,
+                Format.R8G8B8A8Sint      => TextureFormat.R8G8B8A8Sint,
+                Format.R8G8B8A8Srgb      => TextureFormat.R8G8B8A8Unorm,
+                Format.R16G16B16A16Float => TextureFormat.R16G16B16A16Float,
+                Format.R16G16B16A16Unorm => TextureFormat.R16G16B16A16Unorm,
+                Format.R16G16B16A16Snorm => TextureFormat.R16G16B16A16Snorm,
+                Format.R16G16B16A16Uint  => TextureFormat.R16G16B16A16Uint,
+                Format.R16G16B16A16Sint  => TextureFormat.R16G16B16A16Sint,
+                Format.R32G32B32A32Float => TextureFormat.R32G32B32A32Float,
+                Format.R32G32B32A32Uint  => TextureFormat.R32G32B32A32Uint,
+                Format.R32G32B32A32Sint  => TextureFormat.R32G32B32A32Sint,
+                Format.R10G10B10A2Unorm  => TextureFormat.R10G10B10A2Unorm,
+                Format.R10G10B10A2Uint   => TextureFormat.R10G10B10A2Uint,
+                Format.R11G11B10Float    => TextureFormat.R11G11B10Float,
+                _                        => TextureFormat.Unknown
+            };
+        }
+
+        /// <summary>
+        /// Queries texture target information.
+        /// </summary>
+        /// <param name="handle">Texture handle</param>
+        /// <returns>True if the texture is a buffer texture, false otherwise</returns>
+        public bool QueryIsTextureBuffer(int handle)
+        {
+            return GetTextureDescriptor(handle).UnpackTextureTarget() == TextureTarget.TextureBuffer;
+        }
+
+        /// <summary>
+        /// Queries texture target information.
+        /// </summary>
+        /// <param name="handle">Texture handle</param>
+        /// <returns>True if the texture is a rectangle texture, false otherwise</returns>
+        public bool QueryIsTextureRectangle(int handle)
+        {
+            var descriptor = GetTextureDescriptor(handle);
+
+            TextureTarget target = descriptor.UnpackTextureTarget();
+
+            bool is2DTexture = target == TextureTarget.Texture2D ||
+                               target == TextureTarget.Texture2DRect;
+
+            return !descriptor.UnpackTextureCoordNormalized() && is2DTexture;
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.OpenGL/Program.cs b/Ryujinx.Graphics.OpenGL/Program.cs
index 17e14df6..d39e181d 100644
--- a/Ryujinx.Graphics.OpenGL/Program.cs
+++ b/Ryujinx.Graphics.OpenGL/Program.cs
@@ -3,6 +3,7 @@ using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.GAL;
 using Ryujinx.Graphics.Shader.CodeGen.Glsl;
 using System;
+using System.Buffers.Binary;
 using System.Collections.Generic;
 using System.Linq;
 
@@ -22,6 +23,8 @@ namespace Ryujinx.Graphics.OpenGL
         {
             Handle = GL.CreateProgram();
 
+            GL.ProgramParameter(Handle, ProgramParameterName.ProgramBinaryRetrievableHint, 1);
+
             for (int index = 0; index < shaders.Length; index++)
             {
                 int shaderHandle = ((Shader)shaders[index]).Handle;
@@ -93,6 +96,27 @@ namespace Ryujinx.Graphics.OpenGL
             ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale");
         }
 
+        public Program(ReadOnlySpan<byte> code)
+        {
+            BinaryFormat binaryFormat = (BinaryFormat)BinaryPrimitives.ReadInt32LittleEndian(code.Slice(code.Length - 4, 4));
+
+            Handle = GL.CreateProgram();
+
+            unsafe
+            {
+                fixed (byte* ptr = code)
+                {
+                    GL.ProgramBinary(Handle, binaryFormat, (IntPtr)ptr, code.Length - 4);
+                }
+            }
+
+            CheckProgramLink();
+
+            FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra");
+            FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale");
+            ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale");
+        }
+
         public void Bind()
         {
             GL.UseProgram(Handle);
@@ -113,6 +137,19 @@ namespace Ryujinx.Graphics.OpenGL
             }
         }
 
+        public byte[] GetBinary()
+        {
+            GL.GetProgram(Handle, (GetProgramParameterName)All.ProgramBinaryLength, out int size);
+
+            byte[] data = new byte[size + 4];
+
+            GL.GetProgramBinary(Handle, size, out _, out BinaryFormat binFormat, data);
+
+            BinaryPrimitives.WriteInt32LittleEndian(data.AsSpan().Slice(size, 4), (int)binFormat);
+
+            return data;
+        }
+
         public void Dispose()
         {
             if (Handle != 0)
diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs
index 75bcda12..f66acce3 100644
--- a/Ryujinx.Graphics.OpenGL/Renderer.cs
+++ b/Ryujinx.Graphics.OpenGL/Renderer.cs
@@ -165,5 +165,19 @@ namespace Ryujinx.Graphics.OpenGL
             _window.Dispose();
             _counters.Dispose();
         }
+
+        public IProgram LoadProgramBinary(byte[] programBinary)
+        {
+            Program program = new Program(programBinary);
+
+            if (program.IsLinked)
+            {
+                return program;
+            }
+
+            program.Dispose();
+
+            return null;
+        }
     }
 }
diff --git a/Ryujinx.Graphics.Shader/BufferDescriptor.cs b/Ryujinx.Graphics.Shader/BufferDescriptor.cs
index 99855518..53a4fb16 100644
--- a/Ryujinx.Graphics.Shader/BufferDescriptor.cs
+++ b/Ryujinx.Graphics.Shader/BufferDescriptor.cs
@@ -2,8 +2,8 @@ namespace Ryujinx.Graphics.Shader
 {
     public struct BufferDescriptor
     {
-        public int Binding { get; }
-        public int Slot { get; }
+        public readonly int Binding;
+        public readonly int Slot;
 
         public BufferDescriptor(int binding, int slot)
         {
diff --git a/Ryujinx.Graphics.Shader/Decoders/IOpCodeTexture.cs b/Ryujinx.Graphics.Shader/Decoders/IOpCodeTexture.cs
index 55d1225a..eb835449 100644
--- a/Ryujinx.Graphics.Shader/Decoders/IOpCodeTexture.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/IOpCodeTexture.cs
@@ -12,7 +12,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
         int ComponentMask { get; }
 
-        int Immediate { get; }
+        int HandleOffset { get; }
 
         TextureLodMode LodMode { get; }
 
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeImage.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeImage.cs
index 5b2f8063..36f0b164 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeImage.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeImage.cs
@@ -2,7 +2,7 @@ using Ryujinx.Graphics.Shader.Instructions;
 
 namespace Ryujinx.Graphics.Shader.Decoders
 {
-    class OpCodeImage : OpCode
+    class OpCodeImage : OpCodeTextureBase
     {
         public Register Ra { get; }
         public Register Rb { get; }
@@ -15,8 +15,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
         public ImageDimensions Dimensions { get; }
 
-        public int Immediate { get; }
-
         public bool UseComponents { get; }
         public bool IsBindless    { get; }
 
@@ -43,7 +41,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
             Dimensions = (ImageDimensions)opCode.Extract(33, 3);
 
-            Immediate  =  opCode.Extract(36, 13);
             IsBindless = !opCode.Extract(51);
         }
     }
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTexture.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTexture.cs
index f19f7dad..55e66f41 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTexture.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTexture.cs
@@ -2,7 +2,7 @@ using Ryujinx.Graphics.Shader.Instructions;
 
 namespace Ryujinx.Graphics.Shader.Decoders
 {
-    class OpCodeTexture : OpCode, IOpCodeTexture
+    class OpCodeTexture : OpCodeTextureBase, IOpCodeTexture
     {
         public Register Rd { get; }
         public Register Ra { get; }
@@ -14,8 +14,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
         public int ComponentMask { get; }
 
-        public int Immediate { get; }
-
         public TextureLodMode LodMode { get; protected set; }
 
         public bool HasOffset       { get; protected set; }
@@ -36,8 +34,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
 
             ComponentMask = opCode.Extract(31, 4);
 
-            Immediate = opCode.Extract(36, 13);
-
             LodMode = (TextureLodMode)opCode.Extract(55, 3);
         }
     }
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureBase.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureBase.cs
new file mode 100644
index 00000000..0da1ca76
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureBase.cs
@@ -0,0 +1,14 @@
+using Ryujinx.Graphics.Shader.Instructions;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+    class OpCodeTextureBase : OpCode
+    {
+        public int HandleOffset { get; }
+
+        public OpCodeTextureBase(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
+        {
+            HandleOffset = opCode.Extract(36, 13);
+        }
+    }
+}
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs
index 3ccd185c..be33a6f0 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs
@@ -3,7 +3,7 @@ using Ryujinx.Graphics.Shader.Instructions;
 
 namespace Ryujinx.Graphics.Shader.Decoders
 {
-    class OpCodeTextureScalar : OpCode
+    class OpCodeTextureScalar : OpCodeTextureBase
     {
 #region "Component mask LUT"
         private const int ____ = 0x0;
@@ -33,8 +33,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
         public Register Rb  { get; }
         public Register Rd1 { get; }
 
-        public int Immediate { get; }
-
         public int ComponentMask { get; protected set; }
 
         protected int RawType;
@@ -50,8 +48,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
             Rb  = new Register(opCode.Extract(20, 8), RegisterType.Gpr);
             Rd1 = new Register(opCode.Extract(28, 8), RegisterType.Gpr);
 
-            Immediate = opCode.Extract(36, 13);
-
             int compSel = opCode.Extract(50, 3);
 
             RawType = opCode.Extract(53, 4);
diff --git a/Ryujinx.Graphics.Shader/InputTopology.cs b/Ryujinx.Graphics.Shader/InputTopology.cs
index 3b0dda45..429aa211 100644
--- a/Ryujinx.Graphics.Shader/InputTopology.cs
+++ b/Ryujinx.Graphics.Shader/InputTopology.cs
@@ -1,6 +1,6 @@
 namespace Ryujinx.Graphics.Shader
 {
-    public enum InputTopology
+    public enum InputTopology : byte
     {
         Points,
         Lines,
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
index 6b439901..a3906294 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
@@ -73,7 +73,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
             Operand[] sources = sourcesList.ToArray();
 
-            int handle = !op.IsBindless ? op.Immediate : 0;
+            int handle = !op.IsBindless ? op.HandleOffset : 0;
 
             TextureFlags flags = op.IsBindless ? TextureFlags.Bindless : TextureFlags.None;
 
@@ -238,7 +238,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
                 if (!op.IsBindless)
                 {
-                    format = context.Config.GetTextureFormat(op.Immediate);
+                    format = context.Config.GetTextureFormat(op.HandleOffset);
                 }
             }
             else
@@ -262,7 +262,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
             Operand[] sources = sourcesList.ToArray();
 
-            int handle = !op.IsBindless ? op.Immediate : 0;
+            int handle = !op.IsBindless ? op.HandleOffset : 0;
 
             TextureFlags flags = op.IsBindless ? TextureFlags.Bindless : TextureFlags.None;
 
@@ -458,7 +458,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
 
                 flags = ConvertTextureFlags(tldsOp.Target) | TextureFlags.IntCoords;
 
-                if (tldsOp.Target == TexelLoadTarget.Texture1DLodZero && context.Config.GpuAccessor.QueryIsTextureBuffer(tldsOp.Immediate))
+                if (tldsOp.Target == TexelLoadTarget.Texture1DLodZero && context.Config.GpuAccessor.QueryIsTextureBuffer(tldsOp.HandleOffset))
                 {
                     type   = SamplerType.TextureBuffer;
                     flags &= ~TextureFlags.LodLevel;
@@ -607,7 +607,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 }
             }
 
-            int handle = op.Immediate;
+            int handle = op.HandleOffset;
 
             for (int compMask = op.ComponentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
             {
@@ -756,7 +756,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 return Register(rdIndex++, RegisterType.Gpr);
             }
 
-            int handle = op.Immediate;
+            int handle = op.HandleOffset;
 
             for (int compMask = op.ComponentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
             {
@@ -870,7 +870,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 return Register(rdIndex++, RegisterType.Gpr);
             }
 
-            int handle = !isBindless ? op.Immediate : 0;
+            int handle = !isBindless ? op.HandleOffset : 0;
 
             for (int compMask = op.ComponentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
             {
@@ -1019,7 +1019,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 return Register(rdIndex++, RegisterType.Gpr);
             }
 
-            int handle = !op.IsBindless ? op.Immediate : 0;
+            int handle = !op.IsBindless ? op.HandleOffset : 0;
 
             for (int compMask = op.ComponentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
             {
@@ -1104,7 +1104,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 return Register(rdIndex++, RegisterType.Gpr);
             }
 
-            int handle = !bindless ? op.Immediate : 0;
+            int handle = !bindless ? op.HandleOffset : 0;
 
             for (int compMask = op.ComponentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
             {
@@ -1181,7 +1181,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
             {
                 // For bindless, we don't have any way to know the texture type,
                 // so we assume it's texture buffer when the sampler type is 1D, since that's more common.
-                bool isTypeBuffer = isBindless || context.Config.GpuAccessor.QueryIsTextureBuffer(op.Immediate);
+                bool isTypeBuffer = isBindless || context.Config.GpuAccessor.QueryIsTextureBuffer(op.HandleOffset);
 
                 if (isTypeBuffer)
                 {
@@ -1269,7 +1269,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
                 return Register(rdIndex++, RegisterType.Gpr);
             }
 
-            int handle = !isBindless ? op.Immediate : 0;
+            int handle = !isBindless ? op.HandleOffset : 0;
 
             for (int compMask = op.ComponentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
             {
diff --git a/Ryujinx.Graphics.Shader/ShaderProgram.cs b/Ryujinx.Graphics.Shader/ShaderProgram.cs
index 30f40175..0e037fc9 100644
--- a/Ryujinx.Graphics.Shader/ShaderProgram.cs
+++ b/Ryujinx.Graphics.Shader/ShaderProgram.cs
@@ -4,8 +4,6 @@ namespace Ryujinx.Graphics.Shader
 {
     public class ShaderProgram
     {
-        public ShaderProgramInfo Info { get; }
-
         public ShaderStage Stage { get; }
 
         public string Code { get; private set; }
@@ -13,9 +11,8 @@ namespace Ryujinx.Graphics.Shader
         public int SizeA { get; }
         public int Size { get; }
 
-        internal ShaderProgram(ShaderProgramInfo info, ShaderStage stage, string code, int size, int sizeA)
+        public ShaderProgram(ShaderStage stage, string code, int size, int sizeA)
         {
-            Info  = info;
             Stage = stage;
             Code  = code;
             SizeA = sizeA;
diff --git a/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs b/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs
index d91c9613..2324fac2 100644
--- a/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs
+++ b/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs
@@ -12,7 +12,7 @@ namespace Ryujinx.Graphics.Shader
 
         public bool UsesInstanceId { get; }
 
-        internal ShaderProgramInfo(
+        public ShaderProgramInfo(
             BufferDescriptor[]  cBuffers,
             BufferDescriptor[]  sBuffers,
             TextureDescriptor[] textures,
diff --git a/Ryujinx.Graphics.Shader/ShaderStage.cs b/Ryujinx.Graphics.Shader/ShaderStage.cs
index 30b65348..63e3b068 100644
--- a/Ryujinx.Graphics.Shader/ShaderStage.cs
+++ b/Ryujinx.Graphics.Shader/ShaderStage.cs
@@ -1,12 +1,14 @@
 namespace Ryujinx.Graphics.Shader
 {
-    public enum ShaderStage
+    public enum ShaderStage : byte
     {
         Compute,
         Vertex,
         TessellationControl,
         TessellationEvaluation,
         Geometry,
-        Fragment
+        Fragment,
+
+        Count
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/TextureDescriptor.cs b/Ryujinx.Graphics.Shader/TextureDescriptor.cs
index 95f5e01d..b7b0ae12 100644
--- a/Ryujinx.Graphics.Shader/TextureDescriptor.cs
+++ b/Ryujinx.Graphics.Shader/TextureDescriptor.cs
@@ -2,15 +2,15 @@ namespace Ryujinx.Graphics.Shader
 {
     public struct TextureDescriptor
     {
-        public int Binding { get; }
+        public readonly int Binding;
 
-        public SamplerType Type { get; }
-        public TextureFormat Format { get; }
+        public readonly SamplerType Type;
+        public readonly TextureFormat Format;
 
-        public int CbufSlot { get; }
-        public int HandleIndex { get; }
+        public readonly int CbufSlot;
+        public readonly int HandleIndex;
 
-        public TextureUsageFlags Flags { get; set; }
+        public TextureUsageFlags Flags;
 
         public TextureDescriptor(int binding, SamplerType type, TextureFormat format, int cbufSlot, int handleIndex)
         {
diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
index 08e7df3b..637ce8fe 100644
--- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
+++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
@@ -1,3 +1,5 @@
+using System.Collections.Generic;
+
 namespace Ryujinx.Graphics.Shader.Translation
 {
     class ShaderConfig
@@ -26,38 +28,42 @@ namespace Ryujinx.Graphics.Shader.Translation
 
         public FeatureFlags UsedFeatures { get; private set; }
 
+        public HashSet<int> TextureHandlesForCache { get; }
+
         public ShaderConfig(IGpuAccessor gpuAccessor, TranslationFlags flags, TranslationCounts counts)
         {
-            Stage             = ShaderStage.Compute;
-            OutputTopology    = OutputTopology.PointList;
-            MaxOutputVertices = 0;
-            LocalMemorySize   = 0;
-            ImapTypes         = null;
-            OmapTargets       = null;
-            OmapSampleMask    = false;
-            OmapDepth         = false;
-            GpuAccessor       = gpuAccessor;
-            Flags             = flags;
-            Size              = 0;
-            UsedFeatures      = FeatureFlags.None;
-            Counts            = counts;
+            Stage                  = ShaderStage.Compute;
+            OutputTopology         = OutputTopology.PointList;
+            MaxOutputVertices      = 0;
+            LocalMemorySize        = 0;
+            ImapTypes              = null;
+            OmapTargets            = null;
+            OmapSampleMask         = false;
+            OmapDepth              = false;
+            GpuAccessor            = gpuAccessor;
+            Flags                  = flags;
+            Size                   = 0;
+            UsedFeatures           = FeatureFlags.None;
+            Counts                 = counts;
+            TextureHandlesForCache = new HashSet<int>();
         }
 
         public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationFlags flags, TranslationCounts counts)
         {
-            Stage             = header.Stage;
-            OutputTopology    = header.OutputTopology;
-            MaxOutputVertices = header.MaxOutputVertexCount;
-            LocalMemorySize   = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize;
-            ImapTypes         = header.ImapTypes;
-            OmapTargets       = header.OmapTargets;
-            OmapSampleMask    = header.OmapSampleMask;
-            OmapDepth         = header.OmapDepth;
-            GpuAccessor       = gpuAccessor;
-            Flags             = flags;
-            Size              = 0;
-            UsedFeatures      = FeatureFlags.None;
-            Counts            = counts;
+            Stage                  = header.Stage;
+            OutputTopology         = header.OutputTopology;
+            MaxOutputVertices      = header.MaxOutputVertexCount;
+            LocalMemorySize        = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize;
+            ImapTypes              = header.ImapTypes;
+            OmapTargets            = header.OmapTargets;
+            OmapSampleMask         = header.OmapSampleMask;
+            OmapDepth              = header.OmapDepth;
+            GpuAccessor            = gpuAccessor;
+            Flags                  = flags;
+            Size                   = 0;
+            UsedFeatures           = FeatureFlags.None;
+            Counts                 = counts;
+            TextureHandlesForCache = new HashSet<int>();
         }
 
         public int GetDepthRegister()
diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs
index 3485b5ed..85a46d19 100644
--- a/Ryujinx.Graphics.Shader/Translation/Translator.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs
@@ -3,7 +3,6 @@ using Ryujinx.Graphics.Shader.Decoders;
 using Ryujinx.Graphics.Shader.IntermediateRepresentation;
 using Ryujinx.Graphics.Shader.StructuredIr;
 using Ryujinx.Graphics.Shader.Translation.Optimizations;
-using System;
 using System.Collections.Generic;
 
 using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
@@ -14,7 +13,7 @@ namespace Ryujinx.Graphics.Shader.Translation
     {
         private const int HeaderSize = 0x50;
 
-        private struct FunctionCode
+        internal struct FunctionCode
         {
             public Operation[] Code { get; }
 
@@ -24,7 +23,7 @@ namespace Ryujinx.Graphics.Shader.Translation
             }
         }
 
-        public static ShaderProgram Translate(
+        public static TranslatorContext CreateContext(
             ulong address,
             IGpuAccessor gpuAccessor,
             TranslationFlags flags,
@@ -32,10 +31,12 @@ namespace Ryujinx.Graphics.Shader.Translation
         {
             counts ??= new TranslationCounts();
 
-            return Translate(DecodeShader(address, gpuAccessor, flags, counts, out ShaderConfig config), config);
+            Block[][] cfg = DecodeShader(address, gpuAccessor, flags, counts, out ShaderConfig config);
+
+            return new TranslatorContext(address, cfg, config);
         }
 
-        public static ShaderProgram Translate(
+        public static TranslatorContext CreateContext(
             ulong addressA,
             ulong addressB,
             IGpuAccessor gpuAccessor,
@@ -44,15 +45,13 @@ namespace Ryujinx.Graphics.Shader.Translation
         {
             counts ??= new TranslationCounts();
 
-            FunctionCode[] funcA = DecodeShader(addressA, gpuAccessor, flags | TranslationFlags.VertexA, counts, out ShaderConfig configA);
-            FunctionCode[] funcB = DecodeShader(addressB, gpuAccessor, flags, counts, out ShaderConfig config);
+            Block[][] cfgA = DecodeShader(addressA, gpuAccessor, flags | TranslationFlags.VertexA, counts, out ShaderConfig configA);
+            Block[][] cfgB = DecodeShader(addressB, gpuAccessor, flags, counts, out ShaderConfig configB);
 
-            config.SetUsedFeature(configA.UsedFeatures);
-
-            return Translate(Combine(funcA, funcB), config, configA.Size);
+            return new TranslatorContext(addressA, addressB, cfgA, cfgB, configA, configB);
         }
 
-        private static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config, int sizeA = 0)
+        internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config, out ShaderProgramInfo shaderProgramInfo, int sizeA = 0)
         {
             var cfgs = new ControlFlowGraph[functions.Length];
             var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length];
@@ -106,7 +105,7 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             GlslProgram program = GlslGenerator.Generate(sInfo, config);
 
-            ShaderProgramInfo spInfo = new ShaderProgramInfo(
+            shaderProgramInfo = new ShaderProgramInfo(
                 program.CBufferDescriptors,
                 program.SBufferDescriptors,
                 program.TextureDescriptors,
@@ -115,10 +114,10 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             string glslCode = program.Code;
 
-            return new ShaderProgram(spInfo, config.Stage, glslCode, config.Size, sizeA);
+            return new ShaderProgram(config.Stage, glslCode, config.Size, sizeA);
         }
 
-        private static FunctionCode[] DecodeShader(
+        private static Block[][] DecodeShader(
             ulong address,
             IGpuAccessor gpuAccessor,
             TranslationFlags flags,
@@ -126,6 +125,7 @@ namespace Ryujinx.Graphics.Shader.Translation
             out ShaderConfig config)
         {
             Block[][] cfg;
+            ulong maxEndAddress = 0;
 
             if ((flags & TranslationFlags.Compute) != 0)
             {
@@ -140,13 +140,34 @@ namespace Ryujinx.Graphics.Shader.Translation
                 cfg = Decoder.Decode(gpuAccessor, address + HeaderSize);
             }
 
-            if (cfg == null)
+            for (int funcIndex = 0; funcIndex < cfg.Length; funcIndex++)
             {
-                gpuAccessor.Log("Invalid branch detected, failed to build CFG.");
+                for (int blkIndex = 0; blkIndex < cfg[funcIndex].Length; blkIndex++)
+                {
+                    Block block = cfg[funcIndex][blkIndex];
 
-                return Array.Empty<FunctionCode>();
+                    if (maxEndAddress < block.EndAddress)
+                    {
+                        maxEndAddress = block.EndAddress;
+                    }
+
+                    for (int index = 0; index < block.OpCodes.Count; index++)
+                    {
+                        if (block.OpCodes[index] is OpCodeTextureBase texture)
+                        {
+                            config.TextureHandlesForCache.Add(texture.HandleOffset);
+                        }
+                    }
+                }
             }
 
+            config.SizeAdd((int)maxEndAddress + (flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize));
+
+            return cfg;
+        }
+
+        internal static FunctionCode[] EmitShader(Block[][] cfg, ShaderConfig config)
+        {
             Dictionary<ulong, int> funcIds = new Dictionary<ulong, int>();
 
             for (int funcIndex = 0; funcIndex < cfg.Length; funcIndex++)
@@ -156,8 +177,6 @@ namespace Ryujinx.Graphics.Shader.Translation
 
             List<FunctionCode> funcs = new List<FunctionCode>();
 
-            ulong maxEndAddress = 0;
-
             for (int funcIndex = 0; funcIndex < cfg.Length; funcIndex++)
             {
                 EmitterContext context = new EmitterContext(config, funcIndex != 0, funcIds);
@@ -166,11 +185,6 @@ namespace Ryujinx.Graphics.Shader.Translation
                 {
                     Block block = cfg[funcIndex][blkIndex];
 
-                    if (maxEndAddress < block.EndAddress)
-                    {
-                        maxEndAddress = block.EndAddress;
-                    }
-
                     context.CurrBlock = block;
 
                     context.MarkLabel(context.GetLabel(block.Address));
@@ -181,12 +195,10 @@ namespace Ryujinx.Graphics.Shader.Translation
                 funcs.Add(new FunctionCode(context.GetOperations()));
             }
 
-            config.SizeAdd((int)maxEndAddress + (flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize));
-
             return funcs.ToArray();
         }
 
-        internal static void EmitOps(EmitterContext context, Block block)
+        private static void EmitOps(EmitterContext context, Block block)
         {
             for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
             {
@@ -267,101 +279,5 @@ namespace Ryujinx.Graphics.Shader.Translation
                 }
             }
         }
-
-        private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b)
-        {
-            // Here we combine two shaders.
-            // For shader A:
-            // - All user attribute stores on shader A are turned into copies to a
-            // temporary variable. It's assumed that shader B will consume them.
-            // - All return instructions are turned into branch instructions, the
-            // branch target being the start of the shader B code.
-            // For shader B:
-            // - All user attribute loads on shader B are turned into copies from a
-            // temporary variable, as long that attribute is written by shader A.
-            FunctionCode[] output = new FunctionCode[a.Length + b.Length - 1];
-
-            List<Operation> ops = new List<Operation>(a.Length + b.Length);
-
-            Operand[] temps = new Operand[AttributeConsts.UserAttributesCount * 4];
-
-            Operand lblB = Label();
-
-            for (int index = 0; index < a[0].Code.Length; index++)
-            {
-                Operation operation = a[0].Code[index];
-
-                if (IsUserAttribute(operation.Dest))
-                {
-                    int tIndex = (operation.Dest.Value - AttributeConsts.UserAttributeBase) / 4;
-
-                    Operand temp = temps[tIndex];
-
-                    if (temp == null)
-                    {
-                        temp = Local();
-
-                        temps[tIndex] = temp;
-                    }
-
-                    operation.Dest = temp;
-                }
-
-                if (operation.Inst == Instruction.Return)
-                {
-                    ops.Add(new Operation(Instruction.Branch, lblB));
-                }
-                else
-                {
-                    ops.Add(operation);
-                }
-            }
-
-            ops.Add(new Operation(Instruction.MarkLabel, lblB));
-
-            for (int index = 0; index < b[0].Code.Length; index++)
-            {
-                Operation operation = b[0].Code[index];
-
-                for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
-                {
-                    Operand src = operation.GetSource(srcIndex);
-
-                    if (IsUserAttribute(src))
-                    {
-                        Operand temp = temps[(src.Value - AttributeConsts.UserAttributeBase) / 4];
-
-                        if (temp != null)
-                        {
-                            operation.SetSource(srcIndex, temp);
-                        }
-                    }
-                }
-
-                ops.Add(operation);
-            }
-
-            output[0] = new FunctionCode(ops.ToArray());
-
-            for (int i = 1; i < a.Length; i++)
-            {
-                output[i] = a[i];
-            }
-
-            for (int i = 1; i < b.Length; i++)
-            {
-                output[a.Length + i - 1] = b[i];
-            }
-
-            return output;
-        }
-
-        private static bool IsUserAttribute(Operand operand)
-        {
-            return operand != null &&
-                   operand.Type == OperandType.Attribute &&
-                   operand.Value >= AttributeConsts.UserAttributeBase &&
-                   operand.Value <  AttributeConsts.UserAttributeEnd;
-        }
     }
 }
\ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
new file mode 100644
index 00000000..3092e077
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
@@ -0,0 +1,160 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+using static Ryujinx.Graphics.Shader.Translation.Translator;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+    public class TranslatorContext
+    {
+        private readonly Block[][] _cfg;
+        private readonly Block[][] _cfgA;
+        private ShaderConfig _config;
+        private ShaderConfig _configA;
+
+        public ulong Address { get; }
+        public ulong AddressA { get; }
+
+        public ShaderStage Stage => _config.Stage;
+        public int Size => _config.Size;
+        public int SizeA => _configA != null ? _configA.Size : 0;
+
+        public HashSet<int> TextureHandlesForCache => _config.TextureHandlesForCache;
+
+        public IGpuAccessor GpuAccessor => _config.GpuAccessor;
+
+        internal TranslatorContext(ulong address, Block[][] cfg, ShaderConfig config)
+        {
+            Address    = address;
+            AddressA   = 0;
+            _config    = config;
+            _configA   = null;
+            _cfg       = cfg;
+            _cfgA      = null;
+        }
+
+        internal TranslatorContext(ulong addressA, ulong addressB, Block[][] cfgA, Block[][] cfgB, ShaderConfig configA, ShaderConfig configB)
+        {
+            Address  = addressB;
+            AddressA = addressA;
+            _config  = configB;
+            _configA = configA;
+            _cfg     = cfgB;
+            _cfgA    = cfgA;
+        }
+
+        private static bool IsUserAttribute(Operand operand)
+        {
+            return operand != null &&
+                   operand.Type == OperandType.Attribute &&
+                   operand.Value >= AttributeConsts.UserAttributeBase &&
+                   operand.Value < AttributeConsts.UserAttributeEnd;
+        }
+
+        private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b)
+        {
+            // Here we combine two shaders.
+            // For shader A:
+            // - All user attribute stores on shader A are turned into copies to a
+            // temporary variable. It's assumed that shader B will consume them.
+            // - All return instructions are turned into branch instructions, the
+            // branch target being the start of the shader B code.
+            // For shader B:
+            // - All user attribute loads on shader B are turned into copies from a
+            // temporary variable, as long that attribute is written by shader A.
+            FunctionCode[] output = new FunctionCode[a.Length + b.Length - 1];
+
+            List<Operation> ops = new List<Operation>(a.Length + b.Length);
+
+            Operand[] temps = new Operand[AttributeConsts.UserAttributesCount * 4];
+
+            Operand lblB = Label();
+
+            for (int index = 0; index < a[0].Code.Length; index++)
+            {
+                Operation operation = a[0].Code[index];
+
+                if (IsUserAttribute(operation.Dest))
+                {
+                    int tIndex = (operation.Dest.Value - AttributeConsts.UserAttributeBase) / 4;
+
+                    Operand temp = temps[tIndex];
+
+                    if (temp == null)
+                    {
+                        temp = Local();
+
+                        temps[tIndex] = temp;
+                    }
+
+                    operation.Dest = temp;
+                }
+
+                if (operation.Inst == Instruction.Return)
+                {
+                    ops.Add(new Operation(Instruction.Branch, lblB));
+                }
+                else
+                {
+                    ops.Add(operation);
+                }
+            }
+
+            ops.Add(new Operation(Instruction.MarkLabel, lblB));
+
+            for (int index = 0; index < b[0].Code.Length; index++)
+            {
+                Operation operation = b[0].Code[index];
+
+                for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+                {
+                    Operand src = operation.GetSource(srcIndex);
+
+                    if (IsUserAttribute(src))
+                    {
+                        Operand temp = temps[(src.Value - AttributeConsts.UserAttributeBase) / 4];
+
+                        if (temp != null)
+                        {
+                            operation.SetSource(srcIndex, temp);
+                        }
+                    }
+                }
+
+                ops.Add(operation);
+            }
+
+            output[0] = new FunctionCode(ops.ToArray());
+
+            for (int i = 1; i < a.Length; i++)
+            {
+                output[i] = a[i];
+            }
+
+            for (int i = 1; i < b.Length; i++)
+            {
+                output[a.Length + i - 1] = b[i];
+            }
+
+            return output;
+        }
+
+        public ShaderProgram Translate(out ShaderProgramInfo shaderProgramInfo)
+        {
+            FunctionCode[] code = EmitShader(_cfg, _config);
+
+            if (_configA != null)
+            {
+                FunctionCode[] codeA = EmitShader(_cfgA, _configA);
+
+                _config.SetUsedFeature(_configA.UsedFeatures);
+
+                code = Combine(codeA, code);
+            }
+
+            return Translator.Translate(code, _config, out shaderProgramInfo, SizeA);
+        }
+    }
+}
diff --git a/Ryujinx.HLE/HOS/ApplicationLoader.cs b/Ryujinx.HLE/HOS/ApplicationLoader.cs
index 46b62178..96fb3a1f 100644
--- a/Ryujinx.HLE/HOS/ApplicationLoader.cs
+++ b/Ryujinx.HLE/HOS/ApplicationLoader.cs
@@ -498,8 +498,13 @@ namespace Ryujinx.HLE.HOS
                 Logger.Warning?.Print(LogClass.Ptc, $"Detected exefs modifications. PPTC disabled.");
             }
 
+            Graphics.Gpu.GraphicsConfig.TitleId = TitleIdText;
+            _device.Gpu.HostInitalized.Set();
+
             Ptc.Initialize(TitleIdText, DisplayVersion, _device.System.EnablePtc && !modified);
 
+            _device.Gpu.ReadyEvent.WaitOne();
+
             ProgramLoader.LoadNsos(_device.System.KernelContext, metaData, executables: programs);
         }
 
@@ -595,6 +600,12 @@ namespace Ryujinx.HLE.HOS
             TitleId      = metaData.Aci0.TitleId;
             TitleIs64Bit = metaData.Is64Bit;
 
+            // Explicitly null titleid to disable the shader cache
+            Graphics.Gpu.GraphicsConfig.TitleId = null;
+
+            _device.Gpu.HostInitalized.Set();
+            _device.Gpu.ReadyEvent.WaitOne();
+
             ProgramLoader.LoadNsos(_device.System.KernelContext, metaData, executables: executable);
         }
 
diff --git a/Ryujinx.ShaderTools/Program.cs b/Ryujinx.ShaderTools/Program.cs
index 567083e4..39b9425c 100644
--- a/Ryujinx.ShaderTools/Program.cs
+++ b/Ryujinx.ShaderTools/Program.cs
@@ -36,7 +36,7 @@ namespace Ryujinx.ShaderTools
 
                 byte[] data = File.ReadAllBytes(args[^1]);
 
-                string code = Translator.Translate(0, new GpuAccessor(data), flags).Code;
+                string code = Translator.CreateContext(0, new GpuAccessor(data), flags).Translate(out _).Code;
 
                 Console.WriteLine(code);
             }
diff --git a/Ryujinx/Config.json b/Ryujinx/Config.json
index fdbd9c1a..6c6f9bef 100644
--- a/Ryujinx/Config.json
+++ b/Ryujinx/Config.json
@@ -21,6 +21,7 @@
   "enable_discord_integration": true,
   "check_updates_on_start": true,
   "enable_vsync": true,
+  "enable_shader_cache": true,
   "enable_multicore_scheduling": true,
   "enable_ptc": false,
   "enable_fs_integrity_checks": true,
diff --git a/Ryujinx/Ui/GLRenderer.cs b/Ryujinx/Ui/GLRenderer.cs
index b635ad1c..0e5ef8f2 100644
--- a/Ryujinx/Ui/GLRenderer.cs
+++ b/Ryujinx/Ui/GLRenderer.cs
@@ -199,19 +199,6 @@ namespace Ryujinx.Ui
             Gtk.Application.Invoke(delegate
             {
                 parent.Present();
-
-                string titleNameSection = string.IsNullOrWhiteSpace(_device.Application.TitleName) ? string.Empty
-                    : $" - {_device.Application.TitleName}";
-
-                string titleVersionSection = string.IsNullOrWhiteSpace(_device.Application.DisplayVersion) ? string.Empty
-                    : $" v{_device.Application.DisplayVersion}";
-
-                string titleIdSection = string.IsNullOrWhiteSpace(_device.Application.TitleIdText) ? string.Empty
-                    : $" ({_device.Application.TitleIdText.ToUpper()})";
-
-                string titleArchSection = _device.Application.TitleIs64Bit ? " (64-bit)" : " (32-bit)";
-
-                parent.Title = $"Ryujinx {Program.Version}{titleNameSection}{titleVersionSection}{titleIdSection}{titleArchSection}";
             });
 
             Thread renderLoopThread = new Thread(Render)
@@ -313,7 +300,7 @@ namespace Ryujinx.Ui
         {
             if (!(_device.Gpu.Renderer is Renderer))
             {
-                throw new NotSupportedException($"GPU renderer must be an OpenGL renderer when using GLRenderer!");
+                throw new NotSupportedException($"GPU renderer must be an OpenGL renderer when using {typeof(Renderer).Name}!");
             }
 
             _renderer = (Renderer)_device.Gpu.Renderer;
@@ -327,7 +314,7 @@ namespace Ryujinx.Ui
             parent.Present();
             GraphicsContext.MakeCurrent(WindowInfo);
 
-            _renderer.Initialize(_glLogLevel);
+            _device.Gpu.Initialize(_glLogLevel);
 
             // Make sure the first frame is not transparent.
             GL.ClearColor(OpenTK.Color.Black);
diff --git a/Ryujinx/Ui/GameTableContextMenu.cs b/Ryujinx/Ui/GameTableContextMenu.cs
index 61e6a80c..58c40791 100644
--- a/Ryujinx/Ui/GameTableContextMenu.cs
+++ b/Ryujinx/Ui/GameTableContextMenu.cs
@@ -111,22 +111,34 @@ namespace Ryujinx.Ui
 
             MenuItem managePtcMenu = new MenuItem("Cache Management");
 
-            MenuItem purgePtcCache = new MenuItem("Purge PPTC cache")
+            MenuItem purgePtcCache = new MenuItem("Purge PPTC Cache")
             {
                 TooltipText = "Delete the Application's PPTC cache."
             };
-            
-            MenuItem openPtcDir = new MenuItem("Open PPTC directory")
+
+            MenuItem purgeShaderCache = new MenuItem("Purge Shader Cache")
             {
-                TooltipText = "Open the directory which contains Application's PPTC cache."
+                TooltipText = "Delete the Application's shader cache."
             };
+
+            MenuItem openPtcDir = new MenuItem("Open PPTC Directory")
+            {
+                TooltipText = "Open the directory which contains the Application's PPTC cache."
+            };
+
+            MenuItem openShaderCacheDir = new MenuItem("Open Shader Cache Directory")
+            {
+                TooltipText = "Open the directory which contains the Application's shader cache."
+            };
+
+            Menu manageSubMenu = new Menu();
             
-            Menu managePtcSubMenu = new Menu();
+            manageSubMenu.Append(purgePtcCache);
+            manageSubMenu.Append(purgeShaderCache);
+            manageSubMenu.Append(openPtcDir);
+            manageSubMenu.Append(openShaderCacheDir);
             
-            managePtcSubMenu.Append(purgePtcCache);
-            managePtcSubMenu.Append(openPtcDir);
-            
-            managePtcMenu.Submenu = managePtcSubMenu;
+            managePtcMenu.Submenu = manageSubMenu;
 
             openSaveUserDir.Activated    += OpenSaveUserDir_Clicked;
             openSaveDeviceDir.Activated  += OpenSaveDeviceDir_Clicked;
@@ -138,8 +150,10 @@ namespace Ryujinx.Ui
             extractExeFs.Activated       += ExtractExeFs_Clicked;
             extractLogo.Activated        += ExtractLogo_Clicked;
             purgePtcCache.Activated      += PurgePtcCache_Clicked;
+            purgeShaderCache.Activated   += PurgeShaderCache_Clicked;
             openPtcDir.Activated         += OpenPtcDir_Clicked;
-            
+            openShaderCacheDir.Activated += OpenShaderCacheDir_Clicked;
+
             this.Add(openSaveUserDir);
             this.Add(openSaveDeviceDir);
             this.Add(openSaveBcatDir);
@@ -640,6 +654,24 @@ namespace Ryujinx.Ui
                 Verb            = "open"
             });
         }
+
+        private void OpenShaderCacheDir_Clicked(object sender, EventArgs args)
+        {
+            string titleId        = _gameTableStore.GetValue(_rowIter, 2).ToString().Split("\n")[1].ToLower();
+            string shaderCacheDir = System.IO.Path.Combine(AppDataManager.GamesDirPath, titleId, "cache", "shader");
+
+            if (!Directory.Exists(shaderCacheDir))
+            {
+                Directory.CreateDirectory(shaderCacheDir);
+            }
+
+            Process.Start(new ProcessStartInfo
+            {
+                FileName        = shaderCacheDir,
+                UseShellExecute = true,
+                Verb            = "open"
+            });
+        }
         
         private void PurgePtcCache_Clicked(object sender, EventArgs args)
         {
@@ -678,5 +710,41 @@ namespace Ryujinx.Ui
 
             warningDialog.Dispose();
         }
+
+        private void PurgeShaderCache_Clicked(object sender, EventArgs args)
+        {
+            string[] tableEntry = _gameTableStore.GetValue(_rowIter, 2).ToString().Split("\n");
+            string titleId = tableEntry[1].ToLower();
+
+            DirectoryInfo shaderCacheDir = new DirectoryInfo(System.IO.Path.Combine(AppDataManager.GamesDirPath, titleId, "cache", "shader"));
+
+            MessageDialog warningDialog = new MessageDialog(null, DialogFlags.Modal, MessageType.Warning, ButtonsType.YesNo, null)
+            {
+                Title          = "Ryujinx - Warning",
+                Text           = $"You are about to delete the shader cache for '{tableEntry[0]}'. Are you sure you want to proceed?",
+                WindowPosition = WindowPosition.Center
+            };
+
+            List<DirectoryInfo> cacheDirectory = new List<DirectoryInfo>();
+
+            if (shaderCacheDir.Exists) { cacheDirectory.AddRange(shaderCacheDir.EnumerateDirectories("*")); }
+
+            if (cacheDirectory.Count > 0 && warningDialog.Run() == (int)ResponseType.Yes)
+            {
+                foreach (DirectoryInfo directory in cacheDirectory)
+                {
+                    try
+                    {
+                        directory.Delete(true);
+                    }
+                    catch (Exception e)
+                    {
+                        Logger.Error?.Print(LogClass.Application, $"Error purging shader cache {directory.Name}: {e}");
+                    }
+                }
+            }
+
+            warningDialog.Dispose();
+        }
     }
 }
diff --git a/Ryujinx/Ui/MainWindow.cs b/Ryujinx/Ui/MainWindow.cs
index 10042143..17eaea74 100644
--- a/Ryujinx/Ui/MainWindow.cs
+++ b/Ryujinx/Ui/MainWindow.cs
@@ -39,6 +39,7 @@ namespace Ryujinx.Ui
         public static GlRenderer GlWidget => _glWidget;
 
         private static AutoResetEvent _deviceExitStatus = new AutoResetEvent(false);
+        private static AutoResetEvent _widgetInitEvent = new AutoResetEvent(false);
 
         private static ListStore _tableStore;
 
@@ -433,6 +434,30 @@ namespace Ryujinx.Ui
                     }
                 }
 
+                _widgetInitEvent.Reset();
+
+#if MACOS_BUILD
+                CreateGameWindow(device);
+#else
+                Thread windowThread = new Thread(() =>
+                {
+                    CreateGameWindow(device);
+                })
+                {
+                    Name = "GUI.WindowThread"
+                };
+
+                windowThread.Start();
+#endif
+
+                _widgetInitEvent.WaitOne();
+
+                // Make sure the widget get initialized by forcing an update of GTK
+                while (Application.EventsPending())
+                {
+                    Application.RunIteration();
+                }
+
                 Logger.Notice.Print(LogClass.Application, $"Using Firmware Version: {firmwareVersion?.VersionString}");
 
                 if (Directory.Exists(path))
@@ -493,25 +518,24 @@ namespace Ryujinx.Ui
                     return;
                 }
 
+                string titleNameSection = string.IsNullOrWhiteSpace(device.Application.TitleName) ? string.Empty
+                    : $" - {device.Application.TitleName}";
+
+                string titleVersionSection = string.IsNullOrWhiteSpace(device.Application.DisplayVersion) ? string.Empty
+                    : $" v{device.Application.DisplayVersion}";
+
+                string titleIdSection = string.IsNullOrWhiteSpace(device.Application.TitleIdText) ? string.Empty
+                    : $" ({device.Application.TitleIdText.ToUpper()})";
+
+                string titleArchSection = device.Application.TitleIs64Bit ? " (64-bit)" : " (32-bit)";
+
+                Title = $"Ryujinx {Program.Version}{titleNameSection}{titleVersionSection}{titleIdSection}{titleArchSection}";
+
                 _emulationContext = device;
                 _gamePath = path;
 
                 _deviceExitStatus.Reset();
 
-#if MACOS_BUILD
-                CreateGameWindow(device);
-#else
-                Thread windowThread = new Thread(() =>
-                {
-                    CreateGameWindow(device);
-                })
-                {
-                    Name = "GUI.WindowThread"
-                };
-
-                windowThread.Start();
-#endif
-
                 _gameLoaded              = true;
                 _stopEmulation.Sensitive = true;
 
@@ -534,7 +558,7 @@ namespace Ryujinx.Ui
                 _windowsMultimediaTimerResolution = new WindowsMultimediaTimerResolution(1);
             }
 
-            _glWidget = new GlRenderer(_emulationContext, ConfigurationState.Instance.Logger.GraphicsDebugLevel);
+            _glWidget = new GlRenderer(device, ConfigurationState.Instance.Logger.GraphicsDebugLevel);
 
             Application.Invoke(delegate
             {
@@ -551,6 +575,8 @@ namespace Ryujinx.Ui
                 }
             });
 
+            _widgetInitEvent.Set();
+
             _glWidget.WaitEvent.WaitOne();
 
             _glWidget.Start();
@@ -658,6 +684,7 @@ namespace Ryujinx.Ui
             Graphics.Gpu.GraphicsConfig.ResScale = (resScale == -1) ? resScaleCustom : resScale;
             Graphics.Gpu.GraphicsConfig.MaxAnisotropy = ConfigurationState.Instance.Graphics.MaxAnisotropy;
             Graphics.Gpu.GraphicsConfig.ShadersDumpPath = ConfigurationState.Instance.Graphics.ShadersDumpPath;
+            Graphics.Gpu.GraphicsConfig.EnableShaderCache = ConfigurationState.Instance.Graphics.EnableShaderCache;
         }
 
         public static void SaveConfig()
diff --git a/Ryujinx/Ui/SettingsWindow.cs b/Ryujinx/Ui/SettingsWindow.cs
index bd4cbbca..4646df06 100644
--- a/Ryujinx/Ui/SettingsWindow.cs
+++ b/Ryujinx/Ui/SettingsWindow.cs
@@ -42,6 +42,7 @@ namespace Ryujinx.Ui
         [GUI] CheckButton     _discordToggle;
         [GUI] CheckButton     _checkUpdatesToggle;
         [GUI] CheckButton     _vSyncToggle;
+        [GUI] CheckButton     _shaderCacheToggle;
         [GUI] CheckButton     _multiSchedToggle;
         [GUI] CheckButton     _ptcToggle;
         [GUI] CheckButton     _fsicToggle;
@@ -182,6 +183,11 @@ namespace Ryujinx.Ui
                 _vSyncToggle.Click();
             }
 
+            if (ConfigurationState.Instance.Graphics.EnableShaderCache)
+            {
+                _shaderCacheToggle.Click();
+            }
+
             if (ConfigurationState.Instance.System.EnableMulticoreScheduling)
             {
                 _multiSchedToggle.Click();
@@ -528,6 +534,7 @@ namespace Ryujinx.Ui
             ConfigurationState.Instance.EnableDiscordIntegration.Value         = _discordToggle.Active;
             ConfigurationState.Instance.CheckUpdatesOnStart.Value              = _checkUpdatesToggle.Active;
             ConfigurationState.Instance.Graphics.EnableVsync.Value             = _vSyncToggle.Active;
+            ConfigurationState.Instance.Graphics.EnableShaderCache.Value       = _shaderCacheToggle.Active;
             ConfigurationState.Instance.System.EnableMulticoreScheduling.Value = _multiSchedToggle.Active;
             ConfigurationState.Instance.System.EnablePtc.Value                 = _ptcToggle.Active;
             ConfigurationState.Instance.System.EnableFsIntegrityChecks.Value   = _fsicToggle.Active;
diff --git a/Ryujinx/Ui/SettingsWindow.glade b/Ryujinx/Ui/SettingsWindow.glade
index 9a51ba2b..ef2262df 100644
--- a/Ryujinx/Ui/SettingsWindow.glade
+++ b/Ryujinx/Ui/SettingsWindow.glade
@@ -1701,6 +1701,24 @@
                                 <property name="margin_left">10</property>
                                 <property name="margin_right">10</property>
                                 <property name="orientation">vertical</property>
+                                <child>
+                                  <object class="GtkCheckButton" id="_shaderCacheToggle">
+                                    <property name="label" translatable="yes">Enable Shader Cache</property>
+                                    <property name="visible">True</property>
+                                    <property name="can_focus">True</property>
+                                    <property name="receives_default">False</property>
+                                    <property name="tooltip_text" translatable="yes">Enables or disables Shader Cache</property>
+                                    <property name="halign">start</property>
+                                    <property name="margin_top">5</property>
+                                    <property name="margin_bottom">5</property>
+                                    <property name="draw_indicator">True</property>
+                                  </object>
+                                  <packing>
+                                    <property name="expand">False</property>
+                                    <property name="fill">True</property>
+                                    <property name="position">0</property>
+                                  </packing>
+                                </child>
                                 <child>
                                   <object class="GtkBox">
                                     <property name="visible">True</property>
@@ -1762,7 +1780,7 @@
                                     <property name="expand">False</property>
                                     <property name="fill">True</property>
                                     <property name="padding">5</property>
-                                    <property name="position">0</property>
+                                    <property name="position">1</property>
                                   </packing>
                                 </child>
                                 <child>
@@ -1817,7 +1835,7 @@
                               <packing>
                                 <property name="expand">False</property>
                                 <property name="fill">True</property>
-                                <property name="position">1</property>
+                                <property name="position">2</property>
                               </packing>
                             </child>
                           </object>