Unverified Commit 639f5461 by greg-lunarg Committed by GitHub

New nonuniform analysis (#2457)

This implements a new nonunifom analysis suggested by @jbolz. This change generates nonUniform decorations that were previously missing and avoids generation of incorrect decorations. Most notably, it now generates decorations for nonuniform functions and out params. It avoids generating decorations for lvalues which themselves are not nonuniform.
parent 74e8f05b
......@@ -2798,8 +2798,9 @@ void Builder::accessChainStore(Id rvalue, Decoration nonUniform, spv::MemoryAcce
}
// Comments in header
Id Builder::accessChainLoad(Decoration precision, Decoration nonUniform, Id resultType,
spv::MemoryAccessMask memoryAccess, spv::Scope scope, unsigned int alignment)
Id Builder::accessChainLoad(Decoration precision, Decoration l_nonUniform,
Decoration r_nonUniform, Id resultType, spv::MemoryAccessMask memoryAccess,
spv::Scope scope, unsigned int alignment)
{
Id id;
......@@ -2863,9 +2864,9 @@ Id Builder::accessChainLoad(Decoration precision, Decoration nonUniform, Id resu
// Buffer accesses need the access chain decorated, and this is where
// loaded image types get decorated. TODO: This should maybe move to
// createImageTextureFunctionCall.
addDecoration(id, nonUniform);
addDecoration(id, l_nonUniform);
id = createLoad(id, precision, memoryAccess, scope, alignment);
addDecoration(id, nonUniform);
addDecoration(id, r_nonUniform);
}
// Done, unless there are swizzles to do
......@@ -2886,7 +2887,7 @@ Id Builder::accessChainLoad(Decoration precision, Decoration nonUniform, Id resu
if (accessChain.component != NoResult)
id = setPrecision(createVectorExtractDynamic(id, resultType, accessChain.component), precision);
addDecoration(id, nonUniform);
addDecoration(id, r_nonUniform);
return id;
}
......
......@@ -625,6 +625,7 @@ public:
CoherentFlags operator |=(const CoherentFlags &other) { return *this; }
#else
bool isVolatile() const { return volatil; }
bool isNonUniform() const { return nonUniform; }
bool anyCoherent() const {
return coherent || devicecoherent || queuefamilycoherent || workgroupcoherent ||
subgroupcoherent || shadercallcoherent;
......@@ -639,6 +640,7 @@ public:
unsigned nonprivate : 1;
unsigned volatil : 1;
unsigned isImage : 1;
unsigned nonUniform : 1;
void clear() {
coherent = 0;
......@@ -650,6 +652,7 @@ public:
nonprivate = 0;
volatil = 0;
isImage = 0;
nonUniform = 0;
}
CoherentFlags operator |=(const CoherentFlags &other) {
......@@ -662,6 +665,7 @@ public:
nonprivate |= other.nonprivate;
volatil |= other.volatil;
isImage |= other.isImage;
nonUniform |= other.nonUniform;
return *this;
}
#endif
......@@ -727,7 +731,7 @@ public:
spv::Scope scope = spv::ScopeMax, unsigned int alignment = 0);
// use accessChain and swizzle to load an r-value
Id accessChainLoad(Decoration precision, Decoration nonUniform, Id ResultType,
Id accessChainLoad(Decoration precision, Decoration l_nonUniform, Decoration r_nonUniform, Id ResultType,
spv::MemoryAccessMask memoryAccess = spv::MemoryAccessMaskNone, spv::Scope scope = spv::ScopeMax,
unsigned int alignment = 0);
......
......@@ -23,6 +23,7 @@ spv.nonuniform4.frag
Decorate 13(rIndex) Flat
Decorate 13(rIndex) Location 3
Decorate 15 DecorationNonUniformEXT
Decorate 17 DecorationNonUniformEXT
Decorate 21 DecorationNonUniformEXT
2: TypeVoid
3: TypeFunction 2
......
......@@ -28,10 +28,12 @@ nonuniformEXT int foo(nonuniformEXT int nupi, nonuniformEXT out int f)
void main()
{
nonuniformEXT int nu_li;
nonuniformEXT int nu_li2;
int dyn_i;
int a = foo(nu_li, nu_li);
nu_li = nonuniformEXT(a) + nonuniformEXT(a * 2);
nu_li2 = a + nonuniformEXT(a * 2);
float b;
b = nu_inv4.x * nu_gf;
......@@ -46,16 +48,25 @@ void main()
b += texelFetch(uniformTexelBuffer[nu_ii], 1).x;
b += imageLoad(storageTexelBuffer[nu_ii], 1).x;
b += texture(sampler2D(uniformTexArr[nu_ii], uniformSampler), inTexcoord.xy).x;
b += texture(nonuniformEXT(sampler2D(uniformTexArr[nu_ii], uniformSampler)), inTexcoord.xy).x;
nonuniformEXT ivec4 v;
nonuniformEXT mat4 m;
nonuniformEXT struct S { int a; } s;
nonuniformEXT int arr[10];
ivec4 uv;
mat4 um;
struct US { int a[10]; } us;
int uarr[10];
b += uniformBuffer[v.y].a;
b += uniformBuffer[v[2]].a;
b += uniformBuffer[uv[nu_ii]].a;
b += uniformBuffer[int(m[2].z)].a;
b += uniformBuffer[s.a].a;
b += uniformBuffer[arr[2]].a;
b += uniformBuffer[int(um[nu_ii].z)].a;
b += uniformBuffer[us.a[nu_ii]].a;
b += uniformBuffer[uarr[nu_ii]].a;
storageBuffer[nu_ii].b = b;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment