Support ballot operations with divergent control flow on Adreno

This commit is contained in:
Gabriel A 2023-08-21 14:01:45 -03:00 committed by Emmanuel Hansen
parent dca4091930
commit 698ac0413b
11 changed files with 66 additions and 6 deletions

View File

@ -37,6 +37,7 @@ namespace Ryujinx.Graphics.GAL
public readonly bool SupportsCubemapView;
public readonly bool SupportsNonConstantTextureOffset;
public readonly bool SupportsShaderBallot;
public readonly bool SupportsShaderBallotDivergence;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureGatherOffsets;
@ -93,6 +94,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsCubemapView,
bool supportsNonConstantTextureOffset,
bool supportsShaderBallot,
bool supportsShaderBallotDivergence,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64,
bool supportsTextureGatherOffsets,
@ -145,6 +147,7 @@ namespace Ryujinx.Graphics.GAL
SupportsCubemapView = supportsCubemapView;
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
SupportsShaderBallot = supportsShaderBallot;
SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureGatherOffsets = supportsTextureGatherOffsets;

View File

@ -180,6 +180,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
public bool QueryHostSupportsShaderBallotDivergence() => _context.Capabilities.SupportsShaderBallotDivergence;
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;

View File

@ -167,6 +167,7 @@ namespace Ryujinx.Graphics.OpenGL
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
supportsScaledVertexFormats: true,
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
supportsShaderBallotDivergence: true,
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
supportsShaderFloat64: true,
supportsTextureGatherOffsets: true,

View File

@ -227,14 +227,44 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation)
{
var source = operation.GetSource(0);
var predicate = context.Get(AggregateType.Bool, source);
var uvec4Type = context.TypeVector(context.TypeU32(), 4);
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
if (!context.HostCapabilities.SupportsShaderBallotDivergence &&
(context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
{
// If divergent ballot is not supported, we can emulate it with a subgroupAdd operation,
// where we add a bit mask with a unique bit set for each subgroup invocation.
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
var bit = context.Select(
context.TypeU32(),
predicate,
context.Constant(context.TypeU32(), 1),
context.Constant(context.TypeU32(), 0));
return new OperationResult(AggregateType.U32, mask);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var threadIdLow = context.BitwiseAnd(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 0x1f));
var threadIdHigh = context.ShiftRightLogical(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 5));
var bitMask = context.ShiftLeftLogical(context.TypeU32(), bit, threadIdLow);
var isGroup = context.IEqual(context.TypeBool(), threadIdHigh, context.Constant(context.TypeU32(), operation.Index));
bitMask = context.Select(context.TypeU32(), isGroup, bitMask, context.Constant(context.TypeU32(), 0));
var mask = context.GroupNonUniformIAdd(
context.TypeU32(),
context.Constant(context.TypeU32(), Scope.Subgroup),
GroupOperation.Reduce,
bitMask);
return new OperationResult(AggregateType.U32, mask);
}
else
{
var uvec4Type = context.TypeVector(context.TypeU32(), 4);
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, predicate);
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
return new OperationResult(AggregateType.U32, mask);
}
}
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)

View File

@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
_poolLock = new object();
}
private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd;
private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd | HelperFunctionsMask.Ballot;
public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters)
{
@ -51,6 +51,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
context.AddCapability(Capability.ImageQuery);
context.AddCapability(Capability.SampledBuffer);
if (info.HelperFunctionsMask.HasFlag(HelperFunctionsMask.Ballot) && !context.HostCapabilities.SupportsShaderBallotDivergence)
{
// Ballots might be emulated with subgroupAdd in those cases.
context.AddCapability(Capability.GroupNonUniformArithmetic);
}
if (parameters.Definitions.TransformFeedbackEnabled && parameters.Definitions.LastInVertexPipeline)
{
context.AddCapability(Capability.TransformFeedback);

View File

@ -312,6 +312,15 @@ namespace Ryujinx.Graphics.Shader
return true;
}
/// <summary>
/// Queries host GPU shader support for ballot instructions on divergent control flow paths.
/// </summary>
/// <returns>True if the GPU supports ballot instructions on divergent control flow paths, false otherwise</returns>
bool QueryHostSupportsShaderBallotDivergence()
{
return true;
}
/// <summary>
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
/// </summary>

View File

@ -9,5 +9,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
MultiplyHighU32 = 1 << 3,
SwizzleAdd = 1 << 10,
FSI = 1 << 11,
Ballot = 1 << 12,
}
}

View File

@ -328,6 +328,9 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
case Instruction.FSIEnd:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI;
break;
case Instruction.Ballot:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.Ballot;
break;
}
}

View File

@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation
public readonly bool SupportsFragmentShaderOrderingIntel;
public readonly bool SupportsGeometryShaderPassthrough;
public readonly bool SupportsShaderBallot;
public readonly bool SupportsShaderBallotDivergence;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsTextureShadowLod;
public readonly bool SupportsViewportMask;
@ -17,6 +18,7 @@ namespace Ryujinx.Graphics.Shader.Translation
bool supportsFragmentShaderOrderingIntel,
bool supportsGeometryShaderPassthrough,
bool supportsShaderBallot,
bool supportsShaderBallotDivergence,
bool supportsShaderBarrierDivergence,
bool supportsTextureShadowLod,
bool supportsViewportMask)
@ -26,6 +28,7 @@ namespace Ryujinx.Graphics.Shader.Translation
SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
SupportsShaderBallot = supportsShaderBallot;
SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsViewportMask = supportsViewportMask;

View File

@ -361,6 +361,7 @@ namespace Ryujinx.Graphics.Shader.Translation
GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(),
GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(),
GpuAccessor.QueryHostSupportsShaderBallot(),
GpuAccessor.QueryHostSupportsShaderBallotDivergence(),
GpuAccessor.QueryHostSupportsShaderBarrierDivergence(),
GpuAccessor.QueryHostSupportsTextureShadowLod(),
GpuAccessor.QueryHostSupportsViewportMask());

View File

@ -616,6 +616,7 @@ namespace Ryujinx.Graphics.Vulkan
supportsNonConstantTextureOffset: false,
supportsScaledVertexFormats: FormatCapabilities.SupportsScaledVertexFormats(),
supportsShaderBallot: false,
supportsShaderBallotDivergence: Vendor != Vendor.Qualcomm,
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,