diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index a5c6eb5c8..2334bf0ca 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -40,6 +40,7 @@ namespace Ryujinx.Graphics.GAL public readonly bool SupportsQuads; public readonly bool SupportsSeparateSampler; public readonly bool SupportsShaderBallot; + public readonly bool SupportsShaderBallotDivergence; public readonly bool SupportsShaderBarrierDivergence; public readonly bool SupportsShaderFloat64; public readonly bool SupportsTextureGatherOffsets; @@ -106,6 +107,7 @@ namespace Ryujinx.Graphics.GAL bool supportsQuads, bool supportsSeparateSampler, bool supportsShaderBallot, + bool supportsShaderBallotDivergence, bool supportsShaderBarrierDivergence, bool supportsShaderFloat64, bool supportsTextureGatherOffsets, @@ -167,6 +169,7 @@ namespace Ryujinx.Graphics.GAL SupportsQuads = supportsQuads; SupportsSeparateSampler = supportsSeparateSampler; SupportsShaderBallot = supportsShaderBallot; + SupportsShaderBallotDivergence = supportsShaderBallotDivergence; SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence; SupportsShaderFloat64 = supportsShaderFloat64; SupportsTextureGatherOffsets = supportsTextureGatherOffsets; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index d89eebabf..8ab8fa926 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -227,6 +227,8 @@ namespace Ryujinx.Graphics.Gpu.Shader public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot; + public bool QueryHostSupportsShaderBallotDivergence() => _context.Capabilities.SupportsShaderBallotDivergence; + public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence; public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64; diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index ba9cd45c6..40243d009 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -177,6 +177,7 @@ namespace Ryujinx.Graphics.OpenGL supportsQuads: HwCapabilities.SupportsQuads, supportsSeparateSampler: false, supportsShaderBallot: HwCapabilities.SupportsShaderBallot, + supportsShaderBallotDivergence: true, supportsShaderBarrierDivergence: !(intelWindows || intelUnix), supportsShaderFloat64: true, supportsTextureGatherOffsets: true, diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs index 6206985d8..8da14714a 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -227,14 +227,44 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation) { var source = operation.GetSource(0); + var predicate = context.Get(AggregateType.Bool, source); - var uvec4Type = context.TypeVector(context.TypeU32(), 4); - var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + if (!context.HostCapabilities.SupportsShaderBallotDivergence && + (context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction)) + { + // If divergent ballot is not supported, we can emulate it with a subgroupAdd operation, + // where we add a bit mask with a unique bit set for each subgroup invocation. - var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source)); - var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index); + var bit = context.Select( + context.TypeU32(), + predicate, + context.Constant(context.TypeU32(), 1), + context.Constant(context.TypeU32(), 0)); - return new OperationResult(AggregateType.U32, mask); + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + var threadIdLow = context.BitwiseAnd(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 0x1f)); + var threadIdHigh = context.ShiftRightLogical(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 5)); + var bitMask = context.ShiftLeftLogical(context.TypeU32(), bit, threadIdLow); + var isGroup = context.IEqual(context.TypeBool(), threadIdHigh, context.Constant(context.TypeU32(), operation.Index)); + bitMask = context.Select(context.TypeU32(), isGroup, bitMask, context.Constant(context.TypeU32(), 0)); + var mask = context.GroupNonUniformIAdd( + context.TypeU32(), + context.Constant(context.TypeU32(), Scope.Subgroup), + GroupOperation.Reduce, + bitMask); + + return new OperationResult(AggregateType.U32, mask); + } + else + { + var uvec4Type = context.TypeVector(context.TypeU32(), 4); + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + + var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, predicate); + var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index); + + return new OperationResult(AggregateType.U32, mask); + } } private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation) diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs index b259dde28..87e87995f 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv _poolLock = new object(); } - private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd; + private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd | HelperFunctionsMask.Ballot; public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters) { @@ -60,6 +60,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv context.AddCapability(Capability.Float64); } + if (info.HelperFunctionsMask.HasFlag(HelperFunctionsMask.Ballot) && !context.HostCapabilities.SupportsShaderBallotDivergence) + { + // Ballots might be emulated with subgroupAdd in those cases. + context.AddCapability(Capability.GroupNonUniformArithmetic); + } + if (parameters.Definitions.TransformFeedbackEnabled && parameters.Definitions.LastInVertexPipeline) { context.AddCapability(Capability.TransformFeedback); diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs index 4e6d6edf9..ae19cac9b 100644 --- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -319,6 +319,15 @@ namespace Ryujinx.Graphics.Shader return true; } + /// + /// Queries host GPU shader support for ballot instructions on divergent control flow paths. + /// + /// True if the GPU supports ballot instructions on divergent control flow paths, false otherwise + bool QueryHostSupportsShaderBallotDivergence() + { + return true; + } + /// /// Queries host GPU shader support for barrier instructions on divergent control flow paths. /// diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index 2a3d65e75..f7ecbe4be 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -9,5 +9,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr MultiplyHighU32 = 1 << 3, SwizzleAdd = 1 << 10, FSI = 1 << 11, + Ballot = 1 << 12, } } diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index 88053658d..c5b258c82 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -338,6 +338,9 @@ namespace Ryujinx.Graphics.Shader.StructuredIr case Instruction.FSIEnd: context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI; break; + case Instruction.Ballot: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.Ballot; + break; } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs index 11fe6599d..c9477aeac 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs @@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation public readonly bool SupportsFragmentShaderOrderingIntel; public readonly bool SupportsGeometryShaderPassthrough; public readonly bool SupportsShaderBallot; + public readonly bool SupportsShaderBallotDivergence; public readonly bool SupportsShaderBarrierDivergence; public readonly bool SupportsShaderFloat64; public readonly bool SupportsTextureShadowLod; @@ -18,6 +19,7 @@ namespace Ryujinx.Graphics.Shader.Translation bool supportsFragmentShaderOrderingIntel, bool supportsGeometryShaderPassthrough, bool supportsShaderBallot, + bool supportsShaderBallotDivergence, bool supportsShaderBarrierDivergence, bool supportsShaderFloat64, bool supportsTextureShadowLod, @@ -28,6 +30,7 @@ namespace Ryujinx.Graphics.Shader.Translation SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel; SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough; SupportsShaderBallot = supportsShaderBallot; + SupportsShaderBallotDivergence = supportsShaderBallotDivergence; SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence; SupportsShaderFloat64 = supportsShaderFloat64; SupportsTextureShadowLod = supportsTextureShadowLod; diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index a579433f9..ecd0fd654 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -362,6 +362,7 @@ namespace Ryujinx.Graphics.Shader.Translation GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(), GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(), GpuAccessor.QueryHostSupportsShaderBallot(), + GpuAccessor.QueryHostSupportsShaderBallotDivergence(), GpuAccessor.QueryHostSupportsShaderBarrierDivergence(), GpuAccessor.QueryHostSupportsShaderFloat64(), GpuAccessor.QueryHostSupportsTextureShadowLod(), diff --git a/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs b/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs index 5ddd157df..b529e931d 100644 --- a/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs +++ b/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs @@ -1,4 +1,4 @@ -using Ryujinx.Common.Logging; +using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Silk.NET.Vulkan; using System; @@ -290,7 +290,7 @@ namespace Ryujinx.Graphics.Vulkan return segments; } - private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollection setUsages, bool hasBatchedTextureSamplerBug, out bool usesBufferTextures) + private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollection setUsages, bool hasBatchedTextureBug, out bool usesBufferTextures) { usesBufferTextures = false; @@ -314,7 +314,7 @@ namespace Ryujinx.Graphics.Vulkan if (currentUsage.Binding + currentCount != usage.Binding || currentUsage.Type != usage.Type || - (currentUsage.Type == ResourceType.TextureAndSampler && hasBatchedTextureSamplerBug) || + (IsReadOnlyTexture(currentUsage.Type) && hasBatchedTextureBug) || currentUsage.Stages != usage.Stages || currentUsage.ArrayLength > 1 || usage.ArrayLength > 1) @@ -450,6 +450,12 @@ namespace Ryujinx.Graphics.Vulkan return (buffer, texture); } + private static bool IsReadOnlyTexture(ResourceType resourceType) + { + return resourceType == ResourceType.TextureAndSampler || resourceType == ResourceType.BufferTexture; + + } + private async Task BackgroundCompilation() { await Task.WhenAll(_shaders.Select(shader => shader.CompileTask)); diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 4e3f73fca..8678532fd 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -758,6 +758,7 @@ namespace Ryujinx.Graphics.Vulkan supportsQuads: false, supportsSeparateSampler: true, supportsShaderBallot: false, + supportsShaderBallotDivergence: Vendor != Vendor.Qualcomm, supportsShaderBarrierDivergence: Vendor != Vendor.Intel, supportsShaderFloat64: Capabilities.SupportsShaderFloat64, supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,