forked from MeloNX/MeloNX
Support ballot operations with divergent control flow on Adreno
This commit is contained in:
parent
24ecab0baa
commit
a2716f715b
@ -37,6 +37,7 @@ namespace Ryujinx.Graphics.GAL
|
||||
public readonly bool SupportsCubemapView;
|
||||
public readonly bool SupportsNonConstantTextureOffset;
|
||||
public readonly bool SupportsShaderBallot;
|
||||
public readonly bool SupportsShaderBallotDivergence;
|
||||
public readonly bool SupportsShaderBarrierDivergence;
|
||||
public readonly bool SupportsShaderFloat64;
|
||||
public readonly bool SupportsTextureGatherOffsets;
|
||||
@ -93,6 +94,7 @@ namespace Ryujinx.Graphics.GAL
|
||||
bool supportsCubemapView,
|
||||
bool supportsNonConstantTextureOffset,
|
||||
bool supportsShaderBallot,
|
||||
bool supportsShaderBallotDivergence,
|
||||
bool supportsShaderBarrierDivergence,
|
||||
bool supportsShaderFloat64,
|
||||
bool supportsTextureGatherOffsets,
|
||||
@ -145,6 +147,7 @@ namespace Ryujinx.Graphics.GAL
|
||||
SupportsCubemapView = supportsCubemapView;
|
||||
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
|
||||
SupportsShaderBallot = supportsShaderBallot;
|
||||
SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
|
||||
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
||||
SupportsShaderFloat64 = supportsShaderFloat64;
|
||||
SupportsTextureGatherOffsets = supportsTextureGatherOffsets;
|
||||
|
@ -180,6 +180,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
|
||||
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
|
||||
|
||||
public bool QueryHostSupportsShaderBallotDivergence() => _context.Capabilities.SupportsShaderBallotDivergence;
|
||||
|
||||
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
|
||||
|
||||
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
|
||||
|
@ -167,6 +167,7 @@ namespace Ryujinx.Graphics.OpenGL
|
||||
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
|
||||
supportsScaledVertexFormats: true,
|
||||
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
|
||||
supportsShaderBallotDivergence: true,
|
||||
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
|
||||
supportsShaderFloat64: true,
|
||||
supportsTextureGatherOffsets: true,
|
||||
|
@ -227,14 +227,44 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||
private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation)
|
||||
{
|
||||
var source = operation.GetSource(0);
|
||||
var predicate = context.Get(AggregateType.Bool, source);
|
||||
|
||||
var uvec4Type = context.TypeVector(context.TypeU32(), 4);
|
||||
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
|
||||
if (!context.HostCapabilities.SupportsShaderBallotDivergence &&
|
||||
(context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
|
||||
{
|
||||
// If divergent ballot is not supported, we can emulate it with a subgroupAdd operation,
|
||||
// where we add a bit mask with a unique bit set for each subgroup invocation.
|
||||
|
||||
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
|
||||
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
|
||||
var bit = context.Select(
|
||||
context.TypeU32(),
|
||||
predicate,
|
||||
context.Constant(context.TypeU32(), 1),
|
||||
context.Constant(context.TypeU32(), 0));
|
||||
|
||||
return new OperationResult(AggregateType.U32, mask);
|
||||
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
|
||||
var threadIdLow = context.BitwiseAnd(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 0x1f));
|
||||
var threadIdHigh = context.ShiftRightLogical(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 5));
|
||||
var bitMask = context.ShiftLeftLogical(context.TypeU32(), bit, threadIdLow);
|
||||
var isGroup = context.IEqual(context.TypeBool(), threadIdHigh, context.Constant(context.TypeU32(), operation.Index));
|
||||
bitMask = context.Select(context.TypeU32(), isGroup, bitMask, context.Constant(context.TypeU32(), 0));
|
||||
var mask = context.GroupNonUniformIAdd(
|
||||
context.TypeU32(),
|
||||
context.Constant(context.TypeU32(), Scope.Subgroup),
|
||||
GroupOperation.Reduce,
|
||||
bitMask);
|
||||
|
||||
return new OperationResult(AggregateType.U32, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
var uvec4Type = context.TypeVector(context.TypeU32(), 4);
|
||||
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
|
||||
|
||||
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, predicate);
|
||||
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
|
||||
|
||||
return new OperationResult(AggregateType.U32, mask);
|
||||
}
|
||||
}
|
||||
|
||||
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
|
||||
|
@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||
_poolLock = new object();
|
||||
}
|
||||
|
||||
private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd;
|
||||
private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd | HelperFunctionsMask.Ballot;
|
||||
|
||||
public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters)
|
||||
{
|
||||
@ -51,6 +51,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
||||
context.AddCapability(Capability.ImageQuery);
|
||||
context.AddCapability(Capability.SampledBuffer);
|
||||
|
||||
if (info.HelperFunctionsMask.HasFlag(HelperFunctionsMask.Ballot) && !context.HostCapabilities.SupportsShaderBallotDivergence)
|
||||
{
|
||||
// Ballots might be emulated with subgroupAdd in those cases.
|
||||
context.AddCapability(Capability.GroupNonUniformArithmetic);
|
||||
}
|
||||
|
||||
if (parameters.Definitions.TransformFeedbackEnabled && parameters.Definitions.LastInVertexPipeline)
|
||||
{
|
||||
context.AddCapability(Capability.TransformFeedback);
|
||||
|
@ -312,6 +312,15 @@ namespace Ryujinx.Graphics.Shader
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Queries host GPU shader support for ballot instructions on divergent control flow paths.
|
||||
/// </summary>
|
||||
/// <returns>True if the GPU supports ballot instructions on divergent control flow paths, false otherwise</returns>
|
||||
bool QueryHostSupportsShaderBallotDivergence()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
|
||||
/// </summary>
|
||||
|
@ -9,5 +9,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
|
||||
MultiplyHighU32 = 1 << 3,
|
||||
SwizzleAdd = 1 << 10,
|
||||
FSI = 1 << 11,
|
||||
Ballot = 1 << 12,
|
||||
}
|
||||
}
|
||||
|
@ -328,6 +328,9 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
|
||||
case Instruction.FSIEnd:
|
||||
context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI;
|
||||
break;
|
||||
case Instruction.Ballot:
|
||||
context.Info.HelperFunctionsMask |= HelperFunctionsMask.Ballot;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||
public readonly bool SupportsFragmentShaderOrderingIntel;
|
||||
public readonly bool SupportsGeometryShaderPassthrough;
|
||||
public readonly bool SupportsShaderBallot;
|
||||
public readonly bool SupportsShaderBallotDivergence;
|
||||
public readonly bool SupportsShaderBarrierDivergence;
|
||||
public readonly bool SupportsTextureShadowLod;
|
||||
public readonly bool SupportsViewportMask;
|
||||
@ -17,6 +18,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||
bool supportsFragmentShaderOrderingIntel,
|
||||
bool supportsGeometryShaderPassthrough,
|
||||
bool supportsShaderBallot,
|
||||
bool supportsShaderBallotDivergence,
|
||||
bool supportsShaderBarrierDivergence,
|
||||
bool supportsTextureShadowLod,
|
||||
bool supportsViewportMask)
|
||||
@ -26,6 +28,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||
SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
|
||||
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
|
||||
SupportsShaderBallot = supportsShaderBallot;
|
||||
SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
|
||||
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
||||
SupportsTextureShadowLod = supportsTextureShadowLod;
|
||||
SupportsViewportMask = supportsViewportMask;
|
||||
|
@ -361,6 +361,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||
GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(),
|
||||
GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(),
|
||||
GpuAccessor.QueryHostSupportsShaderBallot(),
|
||||
GpuAccessor.QueryHostSupportsShaderBallotDivergence(),
|
||||
GpuAccessor.QueryHostSupportsShaderBarrierDivergence(),
|
||||
GpuAccessor.QueryHostSupportsTextureShadowLod(),
|
||||
GpuAccessor.QueryHostSupportsViewportMask());
|
||||
|
@ -616,6 +616,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
supportsNonConstantTextureOffset: false,
|
||||
supportsScaledVertexFormats: FormatCapabilities.SupportsScaledVertexFormats(),
|
||||
supportsShaderBallot: false,
|
||||
supportsShaderBallotDivergence: Vendor != Vendor.Qualcomm,
|
||||
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
|
||||
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
|
||||
supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,
|
||||
|
Loading…
x
Reference in New Issue
Block a user