Commit 4b74373a by Nicolas Capens Committed by Nicolas Capens

Implement dynamic indexing of temporaries.

Previously only dynamic indexing of uniforms was supported. Since this is essentially a gather operation within the register file, it is slow. We optimize the common case of using the loop index variable as relative address, where the index value would be the same for all shader invocations running in lock-step across SIMD lanes. Bug chromium:845103 Bug skia:7846 Change-Id: Idb36b512dd560d740ac9088691b633ff3a1561c1 Reviewed-on: https://swiftshader-review.googlesource.com/18968Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com>
parent 5bff4059
......@@ -1831,6 +1831,11 @@ namespace glsl
return false;
}
if(loop.isDeterministic())
{
deterministicVariables.insert(loop.index->getId());
}
bool unroll = (loop.iterations <= 4);
TIntermNode *init = node->getInit();
......@@ -1916,6 +1921,11 @@ namespace glsl
}
}
if(loop.isDeterministic())
{
deterministicVariables.erase(loop.index->getId());
}
return false;
}
......@@ -2651,10 +2661,12 @@ namespace glsl
sw::Shader::SourceParameter relativeRegister;
source(relativeRegister, right);
int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0;
rel.index = relativeRegister.index;
rel.type = relativeRegister.type;
rel.scale = scale;
rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform);
rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0);
}
}
else if(rel.index != registerIndex(&address)) // Move the previous index register to the address register
......
......@@ -374,6 +374,8 @@ namespace glsl
TQualifier outputQualifier;
std::set<int> deterministicVariables;
TParseContext &mContext;
};
......
......@@ -1822,6 +1822,16 @@ namespace sw
// RValue<Bool> operator!=(RValue<Int4> lhs, RValue<Int4> rhs);
// RValue<Bool> operator==(RValue<Int4> lhs, RValue<Int4> rhs);
inline RValue<Int4> operator+(RValue<Int> lhs, RValue<Int4> rhs)
{
return Int4(lhs) + rhs;
}
inline RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int> rhs)
{
return lhs + Int4(rhs);
}
RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y);
RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y);
RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y);
......
......@@ -366,14 +366,23 @@ namespace sw
if(dst.z) pDst.z = r[dst.index].z;
if(dst.w) pDst.w = r[dst.index].w;
}
else if(!dst.rel.dynamic)
{
Int a = dst.index + relativeAddress(dst.rel);
if(dst.x) pDst.x = r[a].x;
if(dst.y) pDst.y = r[a].y;
if(dst.z) pDst.z = r[a].z;
if(dst.w) pDst.w = r[a].w;
}
else
{
Int a = relativeAddress(dst);
Int4 a = dst.index + dynamicAddress(dst.rel);
if(dst.x) pDst.x = r[dst.index + a].x;
if(dst.y) pDst.y = r[dst.index + a].y;
if(dst.z) pDst.z = r[dst.index + a].z;
if(dst.w) pDst.w = r[dst.index + a].w;
if(dst.x) pDst.x = r[a].x;
if(dst.y) pDst.y = r[a].y;
if(dst.z) pDst.z = r[a].z;
if(dst.w) pDst.w = r[a].w;
}
break;
case Shader::PARAMETER_COLOROUT:
......@@ -384,9 +393,18 @@ namespace sw
if(dst.z) pDst.z = oC[dst.index].z;
if(dst.w) pDst.w = oC[dst.index].w;
}
else if(!dst.rel.dynamic)
{
Int a = dst.index + relativeAddress(dst.rel);
if(dst.x) pDst.x = oC[a].x;
if(dst.y) pDst.y = oC[a].y;
if(dst.z) pDst.z = oC[a].z;
if(dst.w) pDst.w = oC[a].w;
}
else
{
Int a = relativeAddress(dst) + dst.index;
Int4 a = dst.index + dynamicAddress(dst.rel);
if(dst.x) pDst.x = oC[a].x;
if(dst.y) pDst.y = oC[a].y;
......@@ -460,14 +478,23 @@ namespace sw
if(dst.z) r[dst.index].z = d.z;
if(dst.w) r[dst.index].w = d.w;
}
else if(!dst.rel.dynamic)
{
Int a = dst.index + relativeAddress(dst.rel);
if(dst.x) r[a].x = d.x;
if(dst.y) r[a].y = d.y;
if(dst.z) r[a].z = d.z;
if(dst.w) r[a].w = d.w;
}
else
{
Int a = relativeAddress(dst);
Int4 a = dst.index + dynamicAddress(dst.rel);
if(dst.x) r[dst.index + a].x = d.x;
if(dst.y) r[dst.index + a].y = d.y;
if(dst.z) r[dst.index + a].z = d.z;
if(dst.w) r[dst.index + a].w = d.w;
if(dst.x) r.scatter_x(a, d.x);
if(dst.y) r.scatter_y(a, d.y);
if(dst.z) r.scatter_z(a, d.z);
if(dst.w) r.scatter_w(a, d.w);
}
break;
case Shader::PARAMETER_COLOROUT:
......@@ -475,20 +502,30 @@ namespace sw
{
broadcastColor0 = (dst.index == 0) && broadcastColor0;
if(dst.x) { oC[dst.index].x = d.x; }
if(dst.y) { oC[dst.index].y = d.y; }
if(dst.z) { oC[dst.index].z = d.z; }
if(dst.w) { oC[dst.index].w = d.w; }
if(dst.x) oC[dst.index].x = d.x;
if(dst.y) oC[dst.index].y = d.y;
if(dst.z) oC[dst.index].z = d.z;
if(dst.w) oC[dst.index].w = d.w;
}
else if(!dst.rel.dynamic)
{
broadcastColor0 = false;
Int a = dst.index + relativeAddress(dst.rel);
if(dst.x) oC[a].x = d.x;
if(dst.y) oC[a].y = d.y;
if(dst.z) oC[a].z = d.z;
if(dst.w) oC[a].w = d.w;
}
else
{
broadcastColor0 = false;
Int a = relativeAddress(dst) + dst.index;
Int4 a = dst.index + dynamicAddress(dst.rel);
if(dst.x) { oC[a].x = d.x; }
if(dst.y) { oC[a].y = d.y; }
if(dst.z) { oC[a].z = d.z; }
if(dst.w) { oC[a].w = d.w; }
if(dst.x) oC.scatter_x(a, d.x);
if(dst.y) oC.scatter_y(a, d.y);
if(dst.z) oC.scatter_z(a, d.z);
if(dst.w) oC.scatter_w(a, d.w);
}
break;
case Shader::PARAMETER_PREDICATE:
......@@ -826,25 +863,27 @@ namespace sw
{
reg = r[i];
}
else if(!src.rel.dynamic)
{
reg = r[i + relativeAddress(src.rel, src.bufferIndex)];
}
else
{
Int a = relativeAddress(src, src.bufferIndex);
reg = r[i + a];
reg = r[i + dynamicAddress(src.rel)];
}
break;
case Shader::PARAMETER_INPUT:
if(src.rel.type == Shader::PARAMETER_VOID) // Not relative
{
if(src.rel.type == Shader::PARAMETER_VOID) // Not relative
{
reg = v[i];
}
else
{
Int a = relativeAddress(src, src.bufferIndex);
reg = v[i + a];
}
reg = v[i];
}
else if(!src.rel.dynamic)
{
reg = v[i + relativeAddress(src.rel, src.bufferIndex)];
}
else
{
reg = v[i + dynamicAddress(src.rel)];
}
break;
case Shader::PARAMETER_CONST:
......@@ -883,11 +922,13 @@ namespace sw
{
reg = oC[i];
}
else if(!src.rel.dynamic)
{
reg = oC[i + relativeAddress(src.rel, src.bufferIndex)];
}
else
{
Int a = relativeAddress(src, src.bufferIndex);
reg = oC[i + a];
reg = oC[i + dynamicAddress(src.rel)];
}
break;
case Shader::PARAMETER_DEPTHOUT:
......@@ -995,11 +1036,11 @@ namespace sw
}
}
}
else if(src.rel.type == Shader::PARAMETER_LOOP)
else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP)
{
Int loopCounter = aL[loopDepth];
Int a = relativeAddress(src.rel, src.bufferIndex);
c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter));
c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
c.x = c.x.xxxx;
c.y = c.y.yyyy;
......@@ -1008,40 +1049,67 @@ namespace sw
}
else
{
Int a = relativeAddress(src, src.bufferIndex);
int component = src.rel.swizzle & 0x03;
Float4 a;
c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
switch(src.rel.type)
{
case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break;
case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break;
case Shader::PARAMETER_OUTPUT: a = oC[src.rel.index][component]; break;
case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
case Shader::PARAMETER_MISCTYPE:
switch(src.rel.index)
{
case Shader::VPosIndex: a = vPos.x; break;
case Shader::VFaceIndex: a = vFace.x; break;
default: ASSERT(false);
}
break;
default: ASSERT(false);
}
c.x = c.x.xxxx;
c.y = c.y.yyyy;
c.z = c.z.zzzz;
c.w = c.w.wwww;
Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS)); // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
Int index0 = Extract(index, 0);
Int index1 = Extract(index, 1);
Int index2 = Extract(index, 2);
Int index3 = Extract(index, 3);
c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
transpose4x4(c.x, c.y, c.z, c.w);
}
return c;
}
Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex)
Int PixelProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex)
{
ASSERT(var.rel.deterministic);
ASSERT(!rel.dynamic);
if(var.rel.type == Shader::PARAMETER_TEMP)
if(rel.type == Shader::PARAMETER_TEMP)
{
return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale;
return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale;
}
else if(var.rel.type == Shader::PARAMETER_INPUT)
else if(rel.type == Shader::PARAMETER_INPUT)
{
return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale;
return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale;
}
else if(var.rel.type == Shader::PARAMETER_OUTPUT)
else if(rel.type == Shader::PARAMETER_OUTPUT)
{
return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale;
return As<Int>(Extract(oC[rel.index].x, 0)) * rel.scale;
}
else if(var.rel.type == Shader::PARAMETER_CONST)
else if(rel.type == Shader::PARAMETER_CONST)
{
return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale;
return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale;
}
else if(var.rel.type == Shader::PARAMETER_LOOP)
else if(rel.type == Shader::PARAMETER_LOOP)
{
return aL[loopDepth];
}
......@@ -1050,6 +1118,30 @@ namespace sw
return 0;
}
Int4 PixelProgram::dynamicAddress(const Shader::Relative &rel)
{
int component = rel.swizzle & 0x03;
Float4 a;
switch(rel.type)
{
case Shader::PARAMETER_TEMP: a = r[rel.index][component]; break;
case Shader::PARAMETER_INPUT: a = v[rel.index][component]; break;
case Shader::PARAMETER_OUTPUT: a = oC[rel.index][component]; break;
case Shader::PARAMETER_MISCTYPE:
switch(rel.index)
{
case Shader::VPosIndex: a = vPos.x; break;
case Shader::VFaceIndex: a = vFace.x; break;
default: ASSERT(false);
}
break;
default: ASSERT(false);
}
return As<Int4>(a) * Int4(rel.scale);
}
Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2)
{
Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
......
......@@ -94,7 +94,8 @@ namespace sw
Vector4f readConstant(const Src &src, unsigned int offset = 0);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int& offset);
Int relativeAddress(const Shader::Parameter &var, int bufferIndex = -1);
Int relativeAddress(const Shader::Relative &rel, int bufferIndex = -1);
Int4 dynamicAddress(const Shader::Relative &rel);
Float4 linearToSRGB(const Float4 &x);
......
......@@ -402,7 +402,7 @@ namespace sw
unsigned int index;
unsigned int swizzle : 8;
unsigned int scale;
bool deterministic; // Equal accross shader instances run in lockstep (e.g. unrollable loop couters)
bool dynamic; // Varies between concurrent shader instances
};
struct Parameter
......@@ -433,7 +433,7 @@ namespace sw
rel.index = 0;
rel.swizzle = 0;
rel.scale = 1;
rel.deterministic = false;
rel.dynamic = true;
}
std::string string(ShaderType shaderType, unsigned short version) const;
......
......@@ -70,8 +70,9 @@ namespace sw
Vector4f fetchRegister(const Src &src, unsigned int offset = 0);
Vector4f readConstant(const Src &src, unsigned int offset = 0);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int& offset);
Int relativeAddress(const Shader::Parameter &var, int bufferIndex = -1);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int &offset);
Int relativeAddress(const Shader::Relative &rel, int bufferIndex = -1);
Int4 dynamicAddress(const Shader::Relative &rel);
Int4 enableMask(const Shader::Instruction *instruction);
void M3X2(Vector4f &dst, Vector4f &src0, Src &src1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment