From 9eaa683ae492413acba6d12d95a151ae7d3b124b Mon Sep 17 00:00:00 2001 From: Gabriel A Date: Mon, 21 Aug 2023 19:39:25 -0300 Subject: [PATCH] Add spin lock to prevent waiting for fences on multiple threads at once on Adreno Support ballot operations with divergent control flow on Adreno Extend Adreno binding workaround to buffer textures --- src/Ryujinx.Graphics.GAL/Capabilities.cs | 3 ++ .../Shader/GpuAccessorBase.cs | 2 + src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 1 + .../CodeGen/Spirv/Instructions.cs | 40 ++++++++++++++++--- .../CodeGen/Spirv/SpirvGenerator.cs | 8 +++- src/Ryujinx.Graphics.Shader/IGpuAccessor.cs | 9 +++++ .../StructuredIr/HelperFunctionsMask.cs | 1 + .../StructuredIr/StructuredProgram.cs | 3 ++ .../Translation/HostCapabilities.cs | 3 ++ .../Translation/TranslatorContext.cs | 1 + .../ShaderCollection.cs | 12 ++++-- src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 1 + 12 files changed, 75 insertions(+), 9 deletions(-) diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index a5c6eb5c8..2334bf0ca 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -40,6 +40,7 @@ namespace Ryujinx.Graphics.GAL public readonly bool SupportsQuads; public readonly bool SupportsSeparateSampler; public readonly bool SupportsShaderBallot; + public readonly bool SupportsShaderBallotDivergence; public readonly bool SupportsShaderBarrierDivergence; public readonly bool SupportsShaderFloat64; public readonly bool SupportsTextureGatherOffsets; @@ -106,6 +107,7 @@ namespace Ryujinx.Graphics.GAL bool supportsQuads, bool supportsSeparateSampler, bool supportsShaderBallot, + bool supportsShaderBallotDivergence, bool supportsShaderBarrierDivergence, bool supportsShaderFloat64, bool supportsTextureGatherOffsets, @@ -167,6 +169,7 @@ namespace Ryujinx.Graphics.GAL SupportsQuads = supportsQuads; SupportsSeparateSampler = supportsSeparateSampler; SupportsShaderBallot = supportsShaderBallot; + SupportsShaderBallotDivergence = supportsShaderBallotDivergence; SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence; SupportsShaderFloat64 = supportsShaderFloat64; SupportsTextureGatherOffsets = supportsTextureGatherOffsets; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index d89eebabf..8ab8fa926 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -227,6 +227,8 @@ namespace Ryujinx.Graphics.Gpu.Shader public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot; + public bool QueryHostSupportsShaderBallotDivergence() => _context.Capabilities.SupportsShaderBallotDivergence; + public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence; public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64; diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index ba9cd45c6..40243d009 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -177,6 +177,7 @@ namespace Ryujinx.Graphics.OpenGL supportsQuads: HwCapabilities.SupportsQuads, supportsSeparateSampler: false, supportsShaderBallot: HwCapabilities.SupportsShaderBallot, + supportsShaderBallotDivergence: true, supportsShaderBarrierDivergence: !(intelWindows || intelUnix), supportsShaderFloat64: true, supportsTextureGatherOffsets: true, diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs index 6206985d8..8da14714a 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -227,14 +227,44 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation) { var source = operation.GetSource(0); + var predicate = context.Get(AggregateType.Bool, source); - var uvec4Type = context.TypeVector(context.TypeU32(), 4); - var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + if (!context.HostCapabilities.SupportsShaderBallotDivergence && + (context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction)) + { + // If divergent ballot is not supported, we can emulate it with a subgroupAdd operation, + // where we add a bit mask with a unique bit set for each subgroup invocation. - var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source)); - var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index); + var bit = context.Select( + context.TypeU32(), + predicate, + context.Constant(context.TypeU32(), 1), + context.Constant(context.TypeU32(), 0)); - return new OperationResult(AggregateType.U32, mask); + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + var threadIdLow = context.BitwiseAnd(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 0x1f)); + var threadIdHigh = context.ShiftRightLogical(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 5)); + var bitMask = context.ShiftLeftLogical(context.TypeU32(), bit, threadIdLow); + var isGroup = context.IEqual(context.TypeBool(), threadIdHigh, context.Constant(context.TypeU32(), operation.Index)); + bitMask = context.Select(context.TypeU32(), isGroup, bitMask, context.Constant(context.TypeU32(), 0)); + var mask = context.GroupNonUniformIAdd( + context.TypeU32(), + context.Constant(context.TypeU32(), Scope.Subgroup), + GroupOperation.Reduce, + bitMask); + + return new OperationResult(AggregateType.U32, mask); + } + else + { + var uvec4Type = context.TypeVector(context.TypeU32(), 4); + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + + var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, predicate); + var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index); + + return new OperationResult(AggregateType.U32, mask); + } } private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation) diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs index b259dde28..87e87995f 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv _poolLock = new object(); } - private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd; + private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd | HelperFunctionsMask.Ballot; public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters) { @@ -60,6 +60,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv context.AddCapability(Capability.Float64); } + if (info.HelperFunctionsMask.HasFlag(HelperFunctionsMask.Ballot) && !context.HostCapabilities.SupportsShaderBallotDivergence) + { + // Ballots might be emulated with subgroupAdd in those cases. + context.AddCapability(Capability.GroupNonUniformArithmetic); + } + if (parameters.Definitions.TransformFeedbackEnabled && parameters.Definitions.LastInVertexPipeline) { context.AddCapability(Capability.TransformFeedback); diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs index 4e6d6edf9..ae19cac9b 100644 --- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -319,6 +319,15 @@ namespace Ryujinx.Graphics.Shader return true; } + /// + /// Queries host GPU shader support for ballot instructions on divergent control flow paths. + /// + /// True if the GPU supports ballot instructions on divergent control flow paths, false otherwise + bool QueryHostSupportsShaderBallotDivergence() + { + return true; + } + /// /// Queries host GPU shader support for barrier instructions on divergent control flow paths. /// diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index 2a3d65e75..f7ecbe4be 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -9,5 +9,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr MultiplyHighU32 = 1 << 3, SwizzleAdd = 1 << 10, FSI = 1 << 11, + Ballot = 1 << 12, } } diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index 88053658d..c5b258c82 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -338,6 +338,9 @@ namespace Ryujinx.Graphics.Shader.StructuredIr case Instruction.FSIEnd: context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI; break; + case Instruction.Ballot: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.Ballot; + break; } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs index 11fe6599d..c9477aeac 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs @@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation public readonly bool SupportsFragmentShaderOrderingIntel; public readonly bool SupportsGeometryShaderPassthrough; public readonly bool SupportsShaderBallot; + public readonly bool SupportsShaderBallotDivergence; public readonly bool SupportsShaderBarrierDivergence; public readonly bool SupportsShaderFloat64; public readonly bool SupportsTextureShadowLod; @@ -18,6 +19,7 @@ namespace Ryujinx.Graphics.Shader.Translation bool supportsFragmentShaderOrderingIntel, bool supportsGeometryShaderPassthrough, bool supportsShaderBallot, + bool supportsShaderBallotDivergence, bool supportsShaderBarrierDivergence, bool supportsShaderFloat64, bool supportsTextureShadowLod, @@ -28,6 +30,7 @@ namespace Ryujinx.Graphics.Shader.Translation SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel; SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough; SupportsShaderBallot = supportsShaderBallot; + SupportsShaderBallotDivergence = supportsShaderBallotDivergence; SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence; SupportsShaderFloat64 = supportsShaderFloat64; SupportsTextureShadowLod = supportsTextureShadowLod; diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index a579433f9..ecd0fd654 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -362,6 +362,7 @@ namespace Ryujinx.Graphics.Shader.Translation GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(), GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(), GpuAccessor.QueryHostSupportsShaderBallot(), + GpuAccessor.QueryHostSupportsShaderBallotDivergence(), GpuAccessor.QueryHostSupportsShaderBarrierDivergence(), GpuAccessor.QueryHostSupportsShaderFloat64(), GpuAccessor.QueryHostSupportsTextureShadowLod(), diff --git a/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs b/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs index 5ddd157df..b529e931d 100644 --- a/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs +++ b/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs @@ -1,4 +1,4 @@ -using Ryujinx.Common.Logging; +using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Silk.NET.Vulkan; using System; @@ -290,7 +290,7 @@ namespace Ryujinx.Graphics.Vulkan return segments; } - private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollection setUsages, bool hasBatchedTextureSamplerBug, out bool usesBufferTextures) + private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollection setUsages, bool hasBatchedTextureBug, out bool usesBufferTextures) { usesBufferTextures = false; @@ -314,7 +314,7 @@ namespace Ryujinx.Graphics.Vulkan if (currentUsage.Binding + currentCount != usage.Binding || currentUsage.Type != usage.Type || - (currentUsage.Type == ResourceType.TextureAndSampler && hasBatchedTextureSamplerBug) || + (IsReadOnlyTexture(currentUsage.Type) && hasBatchedTextureBug) || currentUsage.Stages != usage.Stages || currentUsage.ArrayLength > 1 || usage.ArrayLength > 1) @@ -450,6 +450,12 @@ namespace Ryujinx.Graphics.Vulkan return (buffer, texture); } + private static bool IsReadOnlyTexture(ResourceType resourceType) + { + return resourceType == ResourceType.TextureAndSampler || resourceType == ResourceType.BufferTexture; + + } + private async Task BackgroundCompilation() { await Task.WhenAll(_shaders.Select(shader => shader.CompileTask)); diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 4e3f73fca..8678532fd 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -758,6 +758,7 @@ namespace Ryujinx.Graphics.Vulkan supportsQuads: false, supportsSeparateSampler: true, supportsShaderBallot: false, + supportsShaderBallotDivergence: Vendor != Vendor.Qualcomm, supportsShaderBarrierDivergence: Vendor != Vendor.Intel, supportsShaderFloat64: Capabilities.SupportsShaderFloat64, supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,