diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs
index dc927eaba..cdd7da8cc 100644
--- a/src/Ryujinx.Graphics.GAL/Capabilities.cs
+++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs
@@ -37,6 +37,7 @@ namespace Ryujinx.Graphics.GAL
public readonly bool SupportsCubemapView;
public readonly bool SupportsNonConstantTextureOffset;
public readonly bool SupportsShaderBallot;
+ public readonly bool SupportsShaderBallotDivergence;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureGatherOffsets;
@@ -93,6 +94,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsCubemapView,
bool supportsNonConstantTextureOffset,
bool supportsShaderBallot,
+ bool supportsShaderBallotDivergence,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64,
bool supportsTextureGatherOffsets,
@@ -145,6 +147,7 @@ namespace Ryujinx.Graphics.GAL
SupportsCubemapView = supportsCubemapView;
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
SupportsShaderBallot = supportsShaderBallot;
+ SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureGatherOffsets = supportsTextureGatherOffsets;
diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
index a5b31363b..fd0c48c40 100644
--- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
+++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
@@ -180,6 +180,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
+ public bool QueryHostSupportsShaderBallotDivergence() => _context.Capabilities.SupportsShaderBallotDivergence;
+
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
index 64ba4e3ee..9390271a4 100644
--- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
+++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
@@ -167,6 +167,7 @@ namespace Ryujinx.Graphics.OpenGL
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
supportsScaledVertexFormats: true,
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
+ supportsShaderBallotDivergence: true,
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
supportsShaderFloat64: true,
supportsTextureGatherOffsets: true,
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
index 601753cb0..34bc91f82 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
@@ -227,14 +227,44 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation)
{
var source = operation.GetSource(0);
+ var predicate = context.Get(AggregateType.Bool, source);
- var uvec4Type = context.TypeVector(context.TypeU32(), 4);
- var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
+ if (!context.HostCapabilities.SupportsShaderBallotDivergence &&
+ (context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
+ {
+ // If divergent ballot is not supported, we can emulate it with a subgroupAdd operation,
+ // where we add a bit mask with a unique bit set for each subgroup invocation.
- var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
- var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
+ var bit = context.Select(
+ context.TypeU32(),
+ predicate,
+ context.Constant(context.TypeU32(), 1),
+ context.Constant(context.TypeU32(), 0));
- return new OperationResult(AggregateType.U32, mask);
+ var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
+ var threadIdLow = context.BitwiseAnd(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 0x1f));
+ var threadIdHigh = context.ShiftRightLogical(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 5));
+ var bitMask = context.ShiftLeftLogical(context.TypeU32(), bit, threadIdLow);
+ var isGroup = context.IEqual(context.TypeBool(), threadIdHigh, context.Constant(context.TypeU32(), operation.Index));
+ bitMask = context.Select(context.TypeU32(), isGroup, bitMask, context.Constant(context.TypeU32(), 0));
+ var mask = context.GroupNonUniformIAdd(
+ context.TypeU32(),
+ context.Constant(context.TypeU32(), Scope.Subgroup),
+ GroupOperation.Reduce,
+ bitMask);
+
+ return new OperationResult(AggregateType.U32, mask);
+ }
+ else
+ {
+ var uvec4Type = context.TypeVector(context.TypeU32(), 4);
+ var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
+
+ var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, predicate);
+ var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
+
+ return new OperationResult(AggregateType.U32, mask);
+ }
}
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
index ccfdc46d0..5438119ed 100644
--- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
@@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
_poolLock = new object();
}
- private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd;
+ private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd | HelperFunctionsMask.Ballot;
public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters)
{
@@ -51,6 +51,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
context.AddCapability(Capability.ImageQuery);
context.AddCapability(Capability.SampledBuffer);
+ if (info.HelperFunctionsMask.HasFlag(HelperFunctionsMask.Ballot) && !context.HostCapabilities.SupportsShaderBallotDivergence)
+ {
+ // Ballots might be emulated with subgroupAdd in those cases.
+ context.AddCapability(Capability.GroupNonUniformArithmetic);
+ }
+
if (parameters.Definitions.TransformFeedbackEnabled && parameters.Definitions.LastInVertexPipeline)
{
context.AddCapability(Capability.TransformFeedback);
diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
index df6d29dc5..22f0ba611 100644
--- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
+++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@@ -312,6 +312,15 @@ namespace Ryujinx.Graphics.Shader
return true;
}
+ ///
+ /// Queries host GPU shader support for ballot instructions on divergent control flow paths.
+ ///
+ /// True if the GPU supports ballot instructions on divergent control flow paths, false otherwise
+ bool QueryHostSupportsShaderBallotDivergence()
+ {
+ return true;
+ }
+
///
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
///
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
index 2a3d65e75..f7ecbe4be 100644
--- a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
@@ -9,5 +9,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
MultiplyHighU32 = 1 << 3,
SwizzleAdd = 1 << 10,
FSI = 1 << 11,
+ Ballot = 1 << 12,
}
}
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
index 2e2df7546..70c343592 100644
--- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
@@ -328,6 +328,9 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
case Instruction.FSIEnd:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI;
break;
+ case Instruction.Ballot:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.Ballot;
+ break;
}
}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs
index 2523272b0..431200033 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs
@@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation
public readonly bool SupportsFragmentShaderOrderingIntel;
public readonly bool SupportsGeometryShaderPassthrough;
public readonly bool SupportsShaderBallot;
+ public readonly bool SupportsShaderBallotDivergence;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsTextureShadowLod;
public readonly bool SupportsViewportMask;
@@ -17,6 +18,7 @@ namespace Ryujinx.Graphics.Shader.Translation
bool supportsFragmentShaderOrderingIntel,
bool supportsGeometryShaderPassthrough,
bool supportsShaderBallot,
+ bool supportsShaderBallotDivergence,
bool supportsShaderBarrierDivergence,
bool supportsTextureShadowLod,
bool supportsViewportMask)
@@ -26,6 +28,7 @@ namespace Ryujinx.Graphics.Shader.Translation
SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
SupportsShaderBallot = supportsShaderBallot;
+ SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsViewportMask = supportsViewportMask;
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
index a193ab3c4..c55cd11f9 100644
--- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
+++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
@@ -361,6 +361,7 @@ namespace Ryujinx.Graphics.Shader.Translation
GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(),
GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(),
GpuAccessor.QueryHostSupportsShaderBallot(),
+ GpuAccessor.QueryHostSupportsShaderBallotDivergence(),
GpuAccessor.QueryHostSupportsShaderBarrierDivergence(),
GpuAccessor.QueryHostSupportsTextureShadowLod(),
GpuAccessor.QueryHostSupportsViewportMask());
diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
index 1db104f83..674335c2e 100644
--- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
+++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
@@ -616,6 +616,7 @@ namespace Ryujinx.Graphics.Vulkan
supportsNonConstantTextureOffset: false,
supportsScaledVertexFormats: FormatCapabilities.SupportsScaledVertexFormats(),
supportsShaderBallot: false,
+ supportsShaderBallotDivergence: Vendor != Vendor.Qualcomm,
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,