diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs
index a5c6eb5c8..2334bf0ca 100644
--- a/src/Ryujinx.Graphics.GAL/Capabilities.cs
+++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs
@@ -40,6 +40,7 @@ namespace Ryujinx.Graphics.GAL
public readonly bool SupportsQuads;
public readonly bool SupportsSeparateSampler;
public readonly bool SupportsShaderBallot;
+ public readonly bool SupportsShaderBallotDivergence;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureGatherOffsets;
@@ -106,6 +107,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsQuads,
bool supportsSeparateSampler,
bool supportsShaderBallot,
+ bool supportsShaderBallotDivergence,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64,
bool supportsTextureGatherOffsets,
@@ -167,6 +169,7 @@ namespace Ryujinx.Graphics.GAL
SupportsQuads = supportsQuads;
SupportsSeparateSampler = supportsSeparateSampler;
SupportsShaderBallot = supportsShaderBallot;
+ SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureGatherOffsets = supportsTextureGatherOffsets;
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
index d89eebabf..8ab8fa926 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
@@ -227,6 +227,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
+ public bool QueryHostSupportsShaderBallotDivergence() => _context.Capabilities.SupportsShaderBallotDivergence;
+
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
index ba9cd45c6..40243d009 100644
--- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
+++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
@@ -177,6 +177,7 @@ namespace Ryujinx.Graphics.OpenGL
supportsQuads: HwCapabilities.SupportsQuads,
supportsSeparateSampler: false,
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
+ supportsShaderBallotDivergence: true,
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
supportsShaderFloat64: true,
supportsTextureGatherOffsets: true,
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
index 6206985d8..8da14714a 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
@@ -227,14 +227,44 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation)
{
var source = operation.GetSource(0);
+ var predicate = context.Get(AggregateType.Bool, source);
- var uvec4Type = context.TypeVector(context.TypeU32(), 4);
- var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
+ if (!context.HostCapabilities.SupportsShaderBallotDivergence &&
+ (context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
+ {
+ // If divergent ballot is not supported, we can emulate it with a subgroupAdd operation,
+ // where we add a bit mask with a unique bit set for each subgroup invocation.
- var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
- var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
+ var bit = context.Select(
+ context.TypeU32(),
+ predicate,
+ context.Constant(context.TypeU32(), 1),
+ context.Constant(context.TypeU32(), 0));
- return new OperationResult(AggregateType.U32, mask);
+ var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
+ var threadIdLow = context.BitwiseAnd(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 0x1f));
+ var threadIdHigh = context.ShiftRightLogical(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 5));
+ var bitMask = context.ShiftLeftLogical(context.TypeU32(), bit, threadIdLow);
+ var isGroup = context.IEqual(context.TypeBool(), threadIdHigh, context.Constant(context.TypeU32(), operation.Index));
+ bitMask = context.Select(context.TypeU32(), isGroup, bitMask, context.Constant(context.TypeU32(), 0));
+ var mask = context.GroupNonUniformIAdd(
+ context.TypeU32(),
+ context.Constant(context.TypeU32(), Scope.Subgroup),
+ GroupOperation.Reduce,
+ bitMask);
+
+ return new OperationResult(AggregateType.U32, mask);
+ }
+ else
+ {
+ var uvec4Type = context.TypeVector(context.TypeU32(), 4);
+ var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
+
+ var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, predicate);
+ var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
+
+ return new OperationResult(AggregateType.U32, mask);
+ }
}
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
index b259dde28..87e87995f 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
@@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
_poolLock = new object();
}
- private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd;
+ private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd | HelperFunctionsMask.Ballot;
public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters)
{
@@ -60,6 +60,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
context.AddCapability(Capability.Float64);
}
+ if (info.HelperFunctionsMask.HasFlag(HelperFunctionsMask.Ballot) && !context.HostCapabilities.SupportsShaderBallotDivergence)
+ {
+ // Ballots might be emulated with subgroupAdd in those cases.
+ context.AddCapability(Capability.GroupNonUniformArithmetic);
+ }
+
if (parameters.Definitions.TransformFeedbackEnabled && parameters.Definitions.LastInVertexPipeline)
{
context.AddCapability(Capability.TransformFeedback);
diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
index 4e6d6edf9..ae19cac9b 100644
--- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
+++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@@ -319,6 +319,15 @@ namespace Ryujinx.Graphics.Shader
return true;
}
+ ///
+ /// Queries host GPU shader support for ballot instructions on divergent control flow paths.
+ ///
+ /// True if the GPU supports ballot instructions on divergent control flow paths, false otherwise
+ bool QueryHostSupportsShaderBallotDivergence()
+ {
+ return true;
+ }
+
///
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
///
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
index 2a3d65e75..f7ecbe4be 100644
--- a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
@@ -9,5 +9,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
MultiplyHighU32 = 1 << 3,
SwizzleAdd = 1 << 10,
FSI = 1 << 11,
+ Ballot = 1 << 12,
}
}
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
index 88053658d..c5b258c82 100644
--- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
@@ -338,6 +338,9 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
case Instruction.FSIEnd:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI;
break;
+ case Instruction.Ballot:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.Ballot;
+ break;
}
}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs
index 11fe6599d..c9477aeac 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs
@@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation
public readonly bool SupportsFragmentShaderOrderingIntel;
public readonly bool SupportsGeometryShaderPassthrough;
public readonly bool SupportsShaderBallot;
+ public readonly bool SupportsShaderBallotDivergence;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureShadowLod;
@@ -18,6 +19,7 @@ namespace Ryujinx.Graphics.Shader.Translation
bool supportsFragmentShaderOrderingIntel,
bool supportsGeometryShaderPassthrough,
bool supportsShaderBallot,
+ bool supportsShaderBallotDivergence,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64,
bool supportsTextureShadowLod,
@@ -28,6 +30,7 @@ namespace Ryujinx.Graphics.Shader.Translation
SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
SupportsShaderBallot = supportsShaderBallot;
+ SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureShadowLod = supportsTextureShadowLod;
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
index a579433f9..ecd0fd654 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
@@ -362,6 +362,7 @@ namespace Ryujinx.Graphics.Shader.Translation
GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(),
GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(),
GpuAccessor.QueryHostSupportsShaderBallot(),
+ GpuAccessor.QueryHostSupportsShaderBallotDivergence(),
GpuAccessor.QueryHostSupportsShaderBarrierDivergence(),
GpuAccessor.QueryHostSupportsShaderFloat64(),
GpuAccessor.QueryHostSupportsTextureShadowLod(),
diff --git a/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs b/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs
index 5ddd157df..b529e931d 100644
--- a/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs
+++ b/src/Ryujinx.Graphics.Vulkan/ShaderCollection.cs
@@ -1,4 +1,4 @@
-using Ryujinx.Common.Logging;
+using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Silk.NET.Vulkan;
using System;
@@ -290,7 +290,7 @@ namespace Ryujinx.Graphics.Vulkan
return segments;
}
- private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollection setUsages, bool hasBatchedTextureSamplerBug, out bool usesBufferTextures)
+ private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollection setUsages, bool hasBatchedTextureBug, out bool usesBufferTextures)
{
usesBufferTextures = false;
@@ -314,7 +314,7 @@ namespace Ryujinx.Graphics.Vulkan
if (currentUsage.Binding + currentCount != usage.Binding ||
currentUsage.Type != usage.Type ||
- (currentUsage.Type == ResourceType.TextureAndSampler && hasBatchedTextureSamplerBug) ||
+ (IsReadOnlyTexture(currentUsage.Type) && hasBatchedTextureBug) ||
currentUsage.Stages != usage.Stages ||
currentUsage.ArrayLength > 1 ||
usage.ArrayLength > 1)
@@ -450,6 +450,12 @@ namespace Ryujinx.Graphics.Vulkan
return (buffer, texture);
}
+ private static bool IsReadOnlyTexture(ResourceType resourceType)
+ {
+ return resourceType == ResourceType.TextureAndSampler || resourceType == ResourceType.BufferTexture;
+
+ }
+
private async Task BackgroundCompilation()
{
await Task.WhenAll(_shaders.Select(shader => shader.CompileTask));
diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
index 4e3f73fca..8678532fd 100644
--- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
+++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
@@ -758,6 +758,7 @@ namespace Ryujinx.Graphics.Vulkan
supportsQuads: false,
supportsSeparateSampler: true,
supportsShaderBallot: false,
+ supportsShaderBallotDivergence: Vendor != Vendor.Qualcomm,
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,