Commit 4b74373a by Nicolas Capens Committed by Nicolas Capens

Implement dynamic indexing of temporaries.

Previously only dynamic indexing of uniforms was supported. Since this is essentially a gather operation within the register file, it is slow. We optimize the common case of using the loop index variable as relative address, where the index value would be the same for all shader invocations running in lock-step across SIMD lanes. Bug chromium:845103 Bug skia:7846 Change-Id: Idb36b512dd560d740ac9088691b633ff3a1561c1 Reviewed-on: https://swiftshader-review.googlesource.com/18968Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarAlexis Hétu <sugoi@google.com>
parent 5bff4059
...@@ -1831,6 +1831,11 @@ namespace glsl ...@@ -1831,6 +1831,11 @@ namespace glsl
return false; return false;
} }
if(loop.isDeterministic())
{
deterministicVariables.insert(loop.index->getId());
}
bool unroll = (loop.iterations <= 4); bool unroll = (loop.iterations <= 4);
TIntermNode *init = node->getInit(); TIntermNode *init = node->getInit();
...@@ -1916,6 +1921,11 @@ namespace glsl ...@@ -1916,6 +1921,11 @@ namespace glsl
} }
} }
if(loop.isDeterministic())
{
deterministicVariables.erase(loop.index->getId());
}
return false; return false;
} }
...@@ -2651,10 +2661,12 @@ namespace glsl ...@@ -2651,10 +2661,12 @@ namespace glsl
sw::Shader::SourceParameter relativeRegister; sw::Shader::SourceParameter relativeRegister;
source(relativeRegister, right); source(relativeRegister, right);
int indexId = right->getAsSymbolNode() ? right->getAsSymbolNode()->getId() : 0;
rel.index = relativeRegister.index; rel.index = relativeRegister.index;
rel.type = relativeRegister.type; rel.type = relativeRegister.type;
rel.scale = scale; rel.scale = scale;
rel.deterministic = !(vertexShader && left->getQualifier() == EvqUniform); rel.dynamic = (right->getQualifier() != EvqUniform) && (deterministicVariables.count(indexId) == 0);
} }
} }
else if(rel.index != registerIndex(&address)) // Move the previous index register to the address register else if(rel.index != registerIndex(&address)) // Move the previous index register to the address register
......
...@@ -374,6 +374,8 @@ namespace glsl ...@@ -374,6 +374,8 @@ namespace glsl
TQualifier outputQualifier; TQualifier outputQualifier;
std::set<int> deterministicVariables;
TParseContext &mContext; TParseContext &mContext;
}; };
......
...@@ -1822,6 +1822,16 @@ namespace sw ...@@ -1822,6 +1822,16 @@ namespace sw
// RValue<Bool> operator!=(RValue<Int4> lhs, RValue<Int4> rhs); // RValue<Bool> operator!=(RValue<Int4> lhs, RValue<Int4> rhs);
// RValue<Bool> operator==(RValue<Int4> lhs, RValue<Int4> rhs); // RValue<Bool> operator==(RValue<Int4> lhs, RValue<Int4> rhs);
inline RValue<Int4> operator+(RValue<Int> lhs, RValue<Int4> rhs)
{
return Int4(lhs) + rhs;
}
inline RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int> rhs)
{
return lhs + Int4(rhs);
}
RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y); RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y);
RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y); RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y);
RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y); RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y);
......
...@@ -366,14 +366,23 @@ namespace sw ...@@ -366,14 +366,23 @@ namespace sw
if(dst.z) pDst.z = r[dst.index].z; if(dst.z) pDst.z = r[dst.index].z;
if(dst.w) pDst.w = r[dst.index].w; if(dst.w) pDst.w = r[dst.index].w;
} }
else if(!dst.rel.dynamic)
{
Int a = dst.index + relativeAddress(dst.rel);
if(dst.x) pDst.x = r[a].x;
if(dst.y) pDst.y = r[a].y;
if(dst.z) pDst.z = r[a].z;
if(dst.w) pDst.w = r[a].w;
}
else else
{ {
Int a = relativeAddress(dst); Int4 a = dst.index + dynamicAddress(dst.rel);
if(dst.x) pDst.x = r[dst.index + a].x; if(dst.x) pDst.x = r[a].x;
if(dst.y) pDst.y = r[dst.index + a].y; if(dst.y) pDst.y = r[a].y;
if(dst.z) pDst.z = r[dst.index + a].z; if(dst.z) pDst.z = r[a].z;
if(dst.w) pDst.w = r[dst.index + a].w; if(dst.w) pDst.w = r[a].w;
} }
break; break;
case Shader::PARAMETER_COLOROUT: case Shader::PARAMETER_COLOROUT:
...@@ -384,9 +393,18 @@ namespace sw ...@@ -384,9 +393,18 @@ namespace sw
if(dst.z) pDst.z = oC[dst.index].z; if(dst.z) pDst.z = oC[dst.index].z;
if(dst.w) pDst.w = oC[dst.index].w; if(dst.w) pDst.w = oC[dst.index].w;
} }
else if(!dst.rel.dynamic)
{
Int a = dst.index + relativeAddress(dst.rel);
if(dst.x) pDst.x = oC[a].x;
if(dst.y) pDst.y = oC[a].y;
if(dst.z) pDst.z = oC[a].z;
if(dst.w) pDst.w = oC[a].w;
}
else else
{ {
Int a = relativeAddress(dst) + dst.index; Int4 a = dst.index + dynamicAddress(dst.rel);
if(dst.x) pDst.x = oC[a].x; if(dst.x) pDst.x = oC[a].x;
if(dst.y) pDst.y = oC[a].y; if(dst.y) pDst.y = oC[a].y;
...@@ -460,14 +478,23 @@ namespace sw ...@@ -460,14 +478,23 @@ namespace sw
if(dst.z) r[dst.index].z = d.z; if(dst.z) r[dst.index].z = d.z;
if(dst.w) r[dst.index].w = d.w; if(dst.w) r[dst.index].w = d.w;
} }
else if(!dst.rel.dynamic)
{
Int a = dst.index + relativeAddress(dst.rel);
if(dst.x) r[a].x = d.x;
if(dst.y) r[a].y = d.y;
if(dst.z) r[a].z = d.z;
if(dst.w) r[a].w = d.w;
}
else else
{ {
Int a = relativeAddress(dst); Int4 a = dst.index + dynamicAddress(dst.rel);
if(dst.x) r[dst.index + a].x = d.x; if(dst.x) r.scatter_x(a, d.x);
if(dst.y) r[dst.index + a].y = d.y; if(dst.y) r.scatter_y(a, d.y);
if(dst.z) r[dst.index + a].z = d.z; if(dst.z) r.scatter_z(a, d.z);
if(dst.w) r[dst.index + a].w = d.w; if(dst.w) r.scatter_w(a, d.w);
} }
break; break;
case Shader::PARAMETER_COLOROUT: case Shader::PARAMETER_COLOROUT:
...@@ -475,20 +502,30 @@ namespace sw ...@@ -475,20 +502,30 @@ namespace sw
{ {
broadcastColor0 = (dst.index == 0) && broadcastColor0; broadcastColor0 = (dst.index == 0) && broadcastColor0;
if(dst.x) { oC[dst.index].x = d.x; } if(dst.x) oC[dst.index].x = d.x;
if(dst.y) { oC[dst.index].y = d.y; } if(dst.y) oC[dst.index].y = d.y;
if(dst.z) { oC[dst.index].z = d.z; } if(dst.z) oC[dst.index].z = d.z;
if(dst.w) { oC[dst.index].w = d.w; } if(dst.w) oC[dst.index].w = d.w;
}
else if(!dst.rel.dynamic)
{
broadcastColor0 = false;
Int a = dst.index + relativeAddress(dst.rel);
if(dst.x) oC[a].x = d.x;
if(dst.y) oC[a].y = d.y;
if(dst.z) oC[a].z = d.z;
if(dst.w) oC[a].w = d.w;
} }
else else
{ {
broadcastColor0 = false; broadcastColor0 = false;
Int a = relativeAddress(dst) + dst.index; Int4 a = dst.index + dynamicAddress(dst.rel);
if(dst.x) { oC[a].x = d.x; } if(dst.x) oC.scatter_x(a, d.x);
if(dst.y) { oC[a].y = d.y; } if(dst.y) oC.scatter_y(a, d.y);
if(dst.z) { oC[a].z = d.z; } if(dst.z) oC.scatter_z(a, d.z);
if(dst.w) { oC[a].w = d.w; } if(dst.w) oC.scatter_w(a, d.w);
} }
break; break;
case Shader::PARAMETER_PREDICATE: case Shader::PARAMETER_PREDICATE:
...@@ -826,25 +863,27 @@ namespace sw ...@@ -826,25 +863,27 @@ namespace sw
{ {
reg = r[i]; reg = r[i];
} }
else if(!src.rel.dynamic)
{
reg = r[i + relativeAddress(src.rel, src.bufferIndex)];
}
else else
{ {
Int a = relativeAddress(src, src.bufferIndex); reg = r[i + dynamicAddress(src.rel)];
reg = r[i + a];
} }
break; break;
case Shader::PARAMETER_INPUT: case Shader::PARAMETER_INPUT:
if(src.rel.type == Shader::PARAMETER_VOID) // Not relative
{ {
if(src.rel.type == Shader::PARAMETER_VOID) // Not relative reg = v[i];
{ }
reg = v[i]; else if(!src.rel.dynamic)
} {
else reg = v[i + relativeAddress(src.rel, src.bufferIndex)];
{ }
Int a = relativeAddress(src, src.bufferIndex); else
{
reg = v[i + a]; reg = v[i + dynamicAddress(src.rel)];
}
} }
break; break;
case Shader::PARAMETER_CONST: case Shader::PARAMETER_CONST:
...@@ -883,11 +922,13 @@ namespace sw ...@@ -883,11 +922,13 @@ namespace sw
{ {
reg = oC[i]; reg = oC[i];
} }
else if(!src.rel.dynamic)
{
reg = oC[i + relativeAddress(src.rel, src.bufferIndex)];
}
else else
{ {
Int a = relativeAddress(src, src.bufferIndex); reg = oC[i + dynamicAddress(src.rel)];
reg = oC[i + a];
} }
break; break;
case Shader::PARAMETER_DEPTHOUT: case Shader::PARAMETER_DEPTHOUT:
...@@ -995,11 +1036,11 @@ namespace sw ...@@ -995,11 +1036,11 @@ namespace sw
} }
} }
} }
else if(src.rel.type == Shader::PARAMETER_LOOP) else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP)
{ {
Int loopCounter = aL[loopDepth]; Int a = relativeAddress(src.rel, src.bufferIndex);
c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter)); c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
c.x = c.x.xxxx; c.x = c.x.xxxx;
c.y = c.y.yyyy; c.y = c.y.yyyy;
...@@ -1008,40 +1049,67 @@ namespace sw ...@@ -1008,40 +1049,67 @@ namespace sw
} }
else else
{ {
Int a = relativeAddress(src, src.bufferIndex); int component = src.rel.swizzle & 0x03;
Float4 a;
c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a)); switch(src.rel.type)
{
case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break;
case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break;
case Shader::PARAMETER_OUTPUT: a = oC[src.rel.index][component]; break;
case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
case Shader::PARAMETER_MISCTYPE:
switch(src.rel.index)
{
case Shader::VPosIndex: a = vPos.x; break;
case Shader::VFaceIndex: a = vFace.x; break;
default: ASSERT(false);
}
break;
default: ASSERT(false);
}
c.x = c.x.xxxx; Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale);
c.y = c.y.yyyy;
c.z = c.z.zzzz; index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS)); // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
c.w = c.w.wwww;
Int index0 = Extract(index, 0);
Int index1 = Extract(index, 1);
Int index2 = Extract(index, 2);
Int index3 = Extract(index, 3);
c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16);
c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16);
c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16);
c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16);
transpose4x4(c.x, c.y, c.z, c.w);
} }
return c; return c;
} }
Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex) Int PixelProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex)
{ {
ASSERT(var.rel.deterministic); ASSERT(!rel.dynamic);
if(var.rel.type == Shader::PARAMETER_TEMP) if(rel.type == Shader::PARAMETER_TEMP)
{ {
return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale; return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale;
} }
else if(var.rel.type == Shader::PARAMETER_INPUT) else if(rel.type == Shader::PARAMETER_INPUT)
{ {
return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale; return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale;
} }
else if(var.rel.type == Shader::PARAMETER_OUTPUT) else if(rel.type == Shader::PARAMETER_OUTPUT)
{ {
return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale; return As<Int>(Extract(oC[rel.index].x, 0)) * rel.scale;
} }
else if(var.rel.type == Shader::PARAMETER_CONST) else if(rel.type == Shader::PARAMETER_CONST)
{ {
return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale; return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale;
} }
else if(var.rel.type == Shader::PARAMETER_LOOP) else if(rel.type == Shader::PARAMETER_LOOP)
{ {
return aL[loopDepth]; return aL[loopDepth];
} }
...@@ -1050,6 +1118,30 @@ namespace sw ...@@ -1050,6 +1118,30 @@ namespace sw
return 0; return 0;
} }
Int4 PixelProgram::dynamicAddress(const Shader::Relative &rel)
{
int component = rel.swizzle & 0x03;
Float4 a;
switch(rel.type)
{
case Shader::PARAMETER_TEMP: a = r[rel.index][component]; break;
case Shader::PARAMETER_INPUT: a = v[rel.index][component]; break;
case Shader::PARAMETER_OUTPUT: a = oC[rel.index][component]; break;
case Shader::PARAMETER_MISCTYPE:
switch(rel.index)
{
case Shader::VPosIndex: a = vPos.x; break;
case Shader::VFaceIndex: a = vFace.x; break;
default: ASSERT(false);
}
break;
default: ASSERT(false);
}
return As<Int4>(a) * Int4(rel.scale);
}
Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2)
{ {
Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x)); Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
......
...@@ -94,7 +94,8 @@ namespace sw ...@@ -94,7 +94,8 @@ namespace sw
Vector4f readConstant(const Src &src, unsigned int offset = 0); Vector4f readConstant(const Src &src, unsigned int offset = 0);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index); RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int& offset); RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int& offset);
Int relativeAddress(const Shader::Parameter &var, int bufferIndex = -1); Int relativeAddress(const Shader::Relative &rel, int bufferIndex = -1);
Int4 dynamicAddress(const Shader::Relative &rel);
Float4 linearToSRGB(const Float4 &x); Float4 linearToSRGB(const Float4 &x);
......
...@@ -402,7 +402,7 @@ namespace sw ...@@ -402,7 +402,7 @@ namespace sw
unsigned int index; unsigned int index;
unsigned int swizzle : 8; unsigned int swizzle : 8;
unsigned int scale; unsigned int scale;
bool deterministic; // Equal accross shader instances run in lockstep (e.g. unrollable loop couters) bool dynamic; // Varies between concurrent shader instances
}; };
struct Parameter struct Parameter
...@@ -433,7 +433,7 @@ namespace sw ...@@ -433,7 +433,7 @@ namespace sw
rel.index = 0; rel.index = 0;
rel.swizzle = 0; rel.swizzle = 0;
rel.scale = 1; rel.scale = 1;
rel.deterministic = false; rel.dynamic = true;
} }
std::string string(ShaderType shaderType, unsigned short version) const; std::string string(ShaderType shaderType, unsigned short version) const;
......
...@@ -70,8 +70,9 @@ namespace sw ...@@ -70,8 +70,9 @@ namespace sw
Vector4f fetchRegister(const Src &src, unsigned int offset = 0); Vector4f fetchRegister(const Src &src, unsigned int offset = 0);
Vector4f readConstant(const Src &src, unsigned int offset = 0); Vector4f readConstant(const Src &src, unsigned int offset = 0);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index); RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index);
RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int& offset); RValue<Pointer<Byte>> uniformAddress(int bufferIndex, unsigned int index, Int &offset);
Int relativeAddress(const Shader::Parameter &var, int bufferIndex = -1); Int relativeAddress(const Shader::Relative &rel, int bufferIndex = -1);
Int4 dynamicAddress(const Shader::Relative &rel);
Int4 enableMask(const Shader::Instruction *instruction); Int4 enableMask(const Shader::Instruction *instruction);
void M3X2(Vector4f &dst, Vector4f &src0, Src &src1); void M3X2(Vector4f &dst, Vector4f &src0, Src &src1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment