Archived
1
0
forked from MeloNX/MeloNX

Add spin lock to prevent waiting for fences on multiple threads at once on Adreno

Support ballot operations with divergent control flow on Adreno

Extend Adreno binding workaround to buffer textures
This commit is contained in:
Gabriel A 2023-08-21 19:39:25 -03:00 committed by Emmanuel Hansen
parent 7cfd5dc902
commit 9eaa683ae4
12 changed files with 75 additions and 9 deletions

View File

@ -40,6 +40,7 @@ namespace Ryujinx.Graphics.GAL
public readonly bool SupportsQuads;
public readonly bool SupportsSeparateSampler;
public readonly bool SupportsShaderBallot;
public readonly bool SupportsShaderBallotDivergence;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureGatherOffsets;
@ -106,6 +107,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsQuads,
bool supportsSeparateSampler,
bool supportsShaderBallot,
bool supportsShaderBallotDivergence,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64,
bool supportsTextureGatherOffsets,
@ -167,6 +169,7 @@ namespace Ryujinx.Graphics.GAL
SupportsQuads = supportsQuads;
SupportsSeparateSampler = supportsSeparateSampler;
SupportsShaderBallot = supportsShaderBallot;
SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureGatherOffsets = supportsTextureGatherOffsets;

View File

@ -227,6 +227,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
public bool QueryHostSupportsShaderBallotDivergence() => _context.Capabilities.SupportsShaderBallotDivergence;
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;

View File

@ -177,6 +177,7 @@ namespace Ryujinx.Graphics.OpenGL
supportsQuads: HwCapabilities.SupportsQuads,
supportsSeparateSampler: false,
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
supportsShaderBallotDivergence: true,
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
supportsShaderFloat64: true,
supportsTextureGatherOffsets: true,

View File

@ -227,14 +227,44 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation)
{
var source = operation.GetSource(0);
var predicate = context.Get(AggregateType.Bool, source);
var uvec4Type = context.TypeVector(context.TypeU32(), 4);
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
if (!context.HostCapabilities.SupportsShaderBallotDivergence &&
(context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
{
// If divergent ballot is not supported, we can emulate it with a subgroupAdd operation,
// where we add a bit mask with a unique bit set for each subgroup invocation.
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
var bit = context.Select(
context.TypeU32(),
predicate,
context.Constant(context.TypeU32(), 1),
context.Constant(context.TypeU32(), 0));
return new OperationResult(AggregateType.U32, mask);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var threadIdLow = context.BitwiseAnd(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 0x1f));
var threadIdHigh = context.ShiftRightLogical(context.TypeU32(), threadId, context.Constant(context.TypeU32(), 5));
var bitMask = context.ShiftLeftLogical(context.TypeU32(), bit, threadIdLow);
var isGroup = context.IEqual(context.TypeBool(), threadIdHigh, context.Constant(context.TypeU32(), operation.Index));
bitMask = context.Select(context.TypeU32(), isGroup, bitMask, context.Constant(context.TypeU32(), 0));
var mask = context.GroupNonUniformIAdd(
context.TypeU32(),
context.Constant(context.TypeU32(), Scope.Subgroup),
GroupOperation.Reduce,
bitMask);
return new OperationResult(AggregateType.U32, mask);
}
else
{
var uvec4Type = context.TypeVector(context.TypeU32(), 4);
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, predicate);
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
return new OperationResult(AggregateType.U32, mask);
}
}
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)

View File

@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
_poolLock = new object();
}
private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd;
private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd | HelperFunctionsMask.Ballot;
public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters)
{
@ -60,6 +60,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
context.AddCapability(Capability.Float64);
}
if (info.HelperFunctionsMask.HasFlag(HelperFunctionsMask.Ballot) && !context.HostCapabilities.SupportsShaderBallotDivergence)
{
// Ballots might be emulated with subgroupAdd in those cases.
context.AddCapability(Capability.GroupNonUniformArithmetic);
}
if (parameters.Definitions.TransformFeedbackEnabled && parameters.Definitions.LastInVertexPipeline)
{
context.AddCapability(Capability.TransformFeedback);

View File

@ -319,6 +319,15 @@ namespace Ryujinx.Graphics.Shader
return true;
}
/// <summary>
/// Queries host GPU shader support for ballot instructions on divergent control flow paths.
/// </summary>
/// <returns>True if the GPU supports ballot instructions on divergent control flow paths, false otherwise</returns>
bool QueryHostSupportsShaderBallotDivergence()
{
return true;
}
/// <summary>
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
/// </summary>

View File

@ -9,5 +9,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
MultiplyHighU32 = 1 << 3,
SwizzleAdd = 1 << 10,
FSI = 1 << 11,
Ballot = 1 << 12,
}
}

View File

@ -338,6 +338,9 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
case Instruction.FSIEnd:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI;
break;
case Instruction.Ballot:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.Ballot;
break;
}
}

View File

@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation
public readonly bool SupportsFragmentShaderOrderingIntel;
public readonly bool SupportsGeometryShaderPassthrough;
public readonly bool SupportsShaderBallot;
public readonly bool SupportsShaderBallotDivergence;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureShadowLod;
@ -18,6 +19,7 @@ namespace Ryujinx.Graphics.Shader.Translation
bool supportsFragmentShaderOrderingIntel,
bool supportsGeometryShaderPassthrough,
bool supportsShaderBallot,
bool supportsShaderBallotDivergence,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64,
bool supportsTextureShadowLod,
@ -28,6 +30,7 @@ namespace Ryujinx.Graphics.Shader.Translation
SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
SupportsShaderBallot = supportsShaderBallot;
SupportsShaderBallotDivergence = supportsShaderBallotDivergence;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureShadowLod = supportsTextureShadowLod;

View File

@ -362,6 +362,7 @@ namespace Ryujinx.Graphics.Shader.Translation
GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(),
GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(),
GpuAccessor.QueryHostSupportsShaderBallot(),
GpuAccessor.QueryHostSupportsShaderBallotDivergence(),
GpuAccessor.QueryHostSupportsShaderBarrierDivergence(),
GpuAccessor.QueryHostSupportsShaderFloat64(),
GpuAccessor.QueryHostSupportsTextureShadowLod(),

View File

@ -1,4 +1,4 @@
using Ryujinx.Common.Logging;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Silk.NET.Vulkan;
using System;
@ -290,7 +290,7 @@ namespace Ryujinx.Graphics.Vulkan
return segments;
}
private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollection<ResourceUsageCollection> setUsages, bool hasBatchedTextureSamplerBug, out bool usesBufferTextures)
private static ResourceBindingSegment[][] BuildBindingSegments(ReadOnlyCollection<ResourceUsageCollection> setUsages, bool hasBatchedTextureBug, out bool usesBufferTextures)
{
usesBufferTextures = false;
@ -314,7 +314,7 @@ namespace Ryujinx.Graphics.Vulkan
if (currentUsage.Binding + currentCount != usage.Binding ||
currentUsage.Type != usage.Type ||
(currentUsage.Type == ResourceType.TextureAndSampler && hasBatchedTextureSamplerBug) ||
(IsReadOnlyTexture(currentUsage.Type) && hasBatchedTextureBug) ||
currentUsage.Stages != usage.Stages ||
currentUsage.ArrayLength > 1 ||
usage.ArrayLength > 1)
@ -450,6 +450,12 @@ namespace Ryujinx.Graphics.Vulkan
return (buffer, texture);
}
private static bool IsReadOnlyTexture(ResourceType resourceType)
{
return resourceType == ResourceType.TextureAndSampler || resourceType == ResourceType.BufferTexture;
}
private async Task BackgroundCompilation()
{
await Task.WhenAll(_shaders.Select(shader => shader.CompileTask));

View File

@ -758,6 +758,7 @@ namespace Ryujinx.Graphics.Vulkan
supportsQuads: false,
supportsSeparateSampler: true,
supportsShaderBallot: false,
supportsShaderBallotDivergence: Vendor != Vendor.Qualcomm,
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,