forked from MeloNX/MeloNX
Support ballot operations with divergent control flow on Adreno
This commit is contained in:
parent
dca4091930
commit
698ac0413b
@ -37,6 +37,7 @@ namespace Ryujinx.Graphics.GAL
|
|||||||
public readonly bool SupportsCubemapView;
|
public readonly bool SupportsCubemapView;
|
||||||
public readonly bool SupportsNonConstantTextureOffset;
|
public readonly bool SupportsNonConstantTextureOffset;
|
||||||
public readonly bool SupportsShaderBallot;
|
public readonly bool SupportsShaderBallot;
|
||||||
|
public readonly bool SupportsShaderBallotDivergence;
|
||||||
public readonly bool SupportsShaderBarrierDivergence;
|
public readonly bool SupportsShaderBarrierDivergence;
|
||||||
public readonly bool SupportsShaderFloat64;
|
public readonly bool SupportsShaderFloat64;
|
||||||
public readonly bool SupportsTextureGatherOffsets;
|
public readonly bool SupportsTextureGatherOffsets;
|
||||||
@ -93,6 +94,7 @@ namespace Ryujinx.Graphics.GAL
|
|||||||
bool supportsCubemapView,
|
bool supportsCubemapView,
|
||||||
bool supportsNonConstantTextureOffset,
|
bool supportsNonConstantTextureOffset,
|
||||||
bool supportsShaderBallot,
|
bool supportsShaderBallot,
|
||||||
|
bool supportsShaderBallotDivergence,
|
||||||
bool supportsShaderBarrierDivergence,
|
bool supportsShaderBarrierDivergence,
|
||||||
bool supportsShaderFloat64,
|
bool supportsShaderFloat64,
|
||||||
bool supportsTextureGatherOffsets,
|
bool supportsTextureGatherOffsets,
|
||||||
@ -145,6 +147,7 @@ namespace Ryujinx.Graphics.GAL
|
|||||||
SupportsCubemapView = supportsCubemapView;
|
SupportsCubemapView = supportsCubemapView;
|
||||||
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
|
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
|
||||||
SupportsShaderBallot = supportsShaderBallot;
|
SupportsShaderBallot = supportsShaderBallot;
|
||||||
|
SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
|
||||||
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
||||||
SupportsShaderFloat64 = supportsShaderFloat64;
|
SupportsShaderFloat64 = supportsShaderFloat64;
|
||||||
SupportsTextureGatherOffsets = supportsTextureGatherOffsets;
|
SupportsTextureGatherOffsets = supportsTextureGatherOffsets;
|
||||||
|
@ -180,6 +180,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
|||||||
|
|
||||||
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
|
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
|
||||||
|
|
||||||
|
public bool QueryHostSupportsShaderBallotDivergence() => _context.Capabilities.SupportsShaderBallotDivergence;
|
||||||
|
|
||||||
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
|
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
|
||||||
|
|
||||||
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
|
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
|
||||||
|
@ -167,6 +167,7 @@ namespace Ryujinx.Graphics.OpenGL
|
|||||||
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
|
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
|
||||||
supportsScaledVertexFormats: true,
|
supportsScaledVertexFormats: true,
|
||||||
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
|
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
|
||||||
|
supportsShaderBallotDivergence: true,
|
||||||
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
|
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
|
||||||
supportsShaderFloat64: true,
|
supportsShaderFloat64: true,
|
||||||
supportsTextureGatherOffsets: true,
|
supportsTextureGatherOffsets: true,
|
||||||
|
@ -227,14 +227,44 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||||||
private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation)
|
private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation)
|
||||||
{
|
{
|
||||||
var source = operation.GetSource(0);
|
var source = operation.GetSource(0);
|
||||||
|
var predicate = context.Get(AggregateType.Bool, source);
|
||||||
|
|
||||||
var uvec4Type = context.TypeVector(context.TypeU32(), 4);
|
if (!context.HostCapabilities.SupportsShaderBallotDivergence &&
|
||||||
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
|
(context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
|
||||||
|
{
|
||||||
|
// If divergent ballot is not supported, we can emulate it with a subgroupAdd operation,
|
||||||
|
// where we add a bit mask with a unique bit set for each subgroup invocation.
|
||||||
|
|
||||||
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
|
var bit = context.Select(
|
||||||
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
|
context.TypeU32(),
|
||||||
|
predicate,
|
||||||
|
context.Constant(context.TypeU32(), 1),
|
||||||
|
context.Constant(context.TypeU32(), 0));
|
||||||
|
|
||||||
return new OperationResult(AggregateType.U32, mask);
|
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
|
||||||
|
var threadIdLow = context.BitwiseAnd(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 0x1f));
|
||||||
|
var threadIdHigh = context.ShiftRightLogical(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 5));
|
||||||
|
var bitMask = context.ShiftLeftLogical(context.TypeU32(), bit, threadIdLow);
|
||||||
|
var isGroup = context.IEqual(context.TypeBool(), threadIdHigh, context.Constant(context.TypeU32(), operation.Index));
|
||||||
|
bitMask = context.Select(context.TypeU32(), isGroup, bitMask, context.Constant(context.TypeU32(), 0));
|
||||||
|
var mask = context.GroupNonUniformIAdd(
|
||||||
|
context.TypeU32(),
|
||||||
|
context.Constant(context.TypeU32(), Scope.Subgroup),
|
||||||
|
GroupOperation.Reduce,
|
||||||
|
bitMask);
|
||||||
|
|
||||||
|
return new OperationResult(AggregateType.U32, mask);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
var uvec4Type = context.TypeVector(context.TypeU32(), 4);
|
||||||
|
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
|
||||||
|
|
||||||
|
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, predicate);
|
||||||
|
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
|
||||||
|
|
||||||
|
return new OperationResult(AggregateType.U32, mask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
|
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
|
||||||
|
@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||||||
_poolLock = new object();
|
_poolLock = new object();
|
||||||
}
|
}
|
||||||
|
|
||||||
private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd;
|
private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd | HelperFunctionsMask.Ballot;
|
||||||
|
|
||||||
public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters)
|
public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters)
|
||||||
{
|
{
|
||||||
@ -51,6 +51,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||||||
context.AddCapability(Capability.ImageQuery);
|
context.AddCapability(Capability.ImageQuery);
|
||||||
context.AddCapability(Capability.SampledBuffer);
|
context.AddCapability(Capability.SampledBuffer);
|
||||||
|
|
||||||
|
if (info.HelperFunctionsMask.HasFlag(HelperFunctionsMask.Ballot) && !context.HostCapabilities.SupportsShaderBallotDivergence)
|
||||||
|
{
|
||||||
|
// Ballots might be emulated with subgroupAdd in those cases.
|
||||||
|
context.AddCapability(Capability.GroupNonUniformArithmetic);
|
||||||
|
}
|
||||||
|
|
||||||
if (parameters.Definitions.TransformFeedbackEnabled && parameters.Definitions.LastInVertexPipeline)
|
if (parameters.Definitions.TransformFeedbackEnabled && parameters.Definitions.LastInVertexPipeline)
|
||||||
{
|
{
|
||||||
context.AddCapability(Capability.TransformFeedback);
|
context.AddCapability(Capability.TransformFeedback);
|
||||||
|
@ -312,6 +312,15 @@ namespace Ryujinx.Graphics.Shader
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Queries host GPU shader support for ballot instructions on divergent control flow paths.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>True if the GPU supports ballot instructions on divergent control flow paths, false otherwise</returns>
|
||||||
|
bool QueryHostSupportsShaderBallotDivergence()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
|
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -9,5 +9,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
|
|||||||
MultiplyHighU32 = 1 << 3,
|
MultiplyHighU32 = 1 << 3,
|
||||||
SwizzleAdd = 1 << 10,
|
SwizzleAdd = 1 << 10,
|
||||||
FSI = 1 << 11,
|
FSI = 1 << 11,
|
||||||
|
Ballot = 1 << 12,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -328,6 +328,9 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
|
|||||||
case Instruction.FSIEnd:
|
case Instruction.FSIEnd:
|
||||||
context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI;
|
context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI;
|
||||||
break;
|
break;
|
||||||
|
case Instruction.Ballot:
|
||||||
|
context.Info.HelperFunctionsMask |= HelperFunctionsMask.Ballot;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||||||
public readonly bool SupportsFragmentShaderOrderingIntel;
|
public readonly bool SupportsFragmentShaderOrderingIntel;
|
||||||
public readonly bool SupportsGeometryShaderPassthrough;
|
public readonly bool SupportsGeometryShaderPassthrough;
|
||||||
public readonly bool SupportsShaderBallot;
|
public readonly bool SupportsShaderBallot;
|
||||||
|
public readonly bool SupportsShaderBallotDivergence;
|
||||||
public readonly bool SupportsShaderBarrierDivergence;
|
public readonly bool SupportsShaderBarrierDivergence;
|
||||||
public readonly bool SupportsTextureShadowLod;
|
public readonly bool SupportsTextureShadowLod;
|
||||||
public readonly bool SupportsViewportMask;
|
public readonly bool SupportsViewportMask;
|
||||||
@ -17,6 +18,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||||||
bool supportsFragmentShaderOrderingIntel,
|
bool supportsFragmentShaderOrderingIntel,
|
||||||
bool supportsGeometryShaderPassthrough,
|
bool supportsGeometryShaderPassthrough,
|
||||||
bool supportsShaderBallot,
|
bool supportsShaderBallot,
|
||||||
|
bool supportsShaderBallotDivergence,
|
||||||
bool supportsShaderBarrierDivergence,
|
bool supportsShaderBarrierDivergence,
|
||||||
bool supportsTextureShadowLod,
|
bool supportsTextureShadowLod,
|
||||||
bool supportsViewportMask)
|
bool supportsViewportMask)
|
||||||
@ -26,6 +28,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||||||
SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
|
SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
|
||||||
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
|
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
|
||||||
SupportsShaderBallot = supportsShaderBallot;
|
SupportsShaderBallot = supportsShaderBallot;
|
||||||
|
SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
|
||||||
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
||||||
SupportsTextureShadowLod = supportsTextureShadowLod;
|
SupportsTextureShadowLod = supportsTextureShadowLod;
|
||||||
SupportsViewportMask = supportsViewportMask;
|
SupportsViewportMask = supportsViewportMask;
|
||||||
|
@ -361,6 +361,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||||||
GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(),
|
GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(),
|
||||||
GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(),
|
GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(),
|
||||||
GpuAccessor.QueryHostSupportsShaderBallot(),
|
GpuAccessor.QueryHostSupportsShaderBallot(),
|
||||||
|
GpuAccessor.QueryHostSupportsShaderBallotDivergence(),
|
||||||
GpuAccessor.QueryHostSupportsShaderBarrierDivergence(),
|
GpuAccessor.QueryHostSupportsShaderBarrierDivergence(),
|
||||||
GpuAccessor.QueryHostSupportsTextureShadowLod(),
|
GpuAccessor.QueryHostSupportsTextureShadowLod(),
|
||||||
GpuAccessor.QueryHostSupportsViewportMask());
|
GpuAccessor.QueryHostSupportsViewportMask());
|
||||||
|
@ -616,6 +616,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||||||
supportsNonConstantTextureOffset: false,
|
supportsNonConstantTextureOffset: false,
|
||||||
supportsScaledVertexFormats: FormatCapabilities.SupportsScaledVertexFormats(),
|
supportsScaledVertexFormats: FormatCapabilities.SupportsScaledVertexFormats(),
|
||||||
supportsShaderBallot: false,
|
supportsShaderBallot: false,
|
||||||
|
supportsShaderBallotDivergence: Vendor != Vendor.Qualcomm,
|
||||||
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
|
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
|
||||||
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
|
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
|
||||||
supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,
|
supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user