Commit e4ef5f77 by Chris Forbes

Separate intermediate values from lvalues

The vast majority of values in a SPIRV program are intermediates -- they are guaranteed written to exactly once, by the instruction which defines them. Initially we had treated these the same as mutable (stack) variables, but that produces wasteful code full of loads and stores. Instead, represent intermediate values as a bundle of RValue<Float4>, representing an rvalue float-sized value per SIMD lane. Introduce the new type Intermediate to hold these bundles to allow incremental construction of the individual RValue<Float4> objects within the bundle. Bug: b/124534397 Change-Id: Ibb663773100d017de117111705b530b092f87ea2 Reviewed-on: https://swiftshader-review.googlesource.com/c/24968Tested-by: 's avatarChris Forbes <chrisforbes@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
parent e205d343
...@@ -488,7 +488,6 @@ namespace sw ...@@ -488,7 +488,6 @@ namespace sw
Int4 SpirvShader::WalkAccessChain(uint32_t id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const Int4 SpirvShader::WalkAccessChain(uint32_t id, uint32_t numIndexes, uint32_t const *indexIds, SpirvRoutine *routine) const
{ {
// TODO: think about decorations, to make this work on location based interfaces
// TODO: think about explicit layout (UBO/SSBO) storage classes // TODO: think about explicit layout (UBO/SSBO) storage classes
// TODO: avoid doing per-lane work in some cases if we can? // TODO: avoid doing per-lane work in some cases if we can?
...@@ -497,8 +496,10 @@ namespace sw ...@@ -497,8 +496,10 @@ namespace sw
auto & baseObject = getObject(id); auto & baseObject = getObject(id);
auto typeId = baseObject.definition.word(1); auto typeId = baseObject.definition.word(1);
// The <base> operand is an intermediate value itself, ie produced by a previous OpAccessChain.
// Start with its offset and build from there.
if (baseObject.kind == Object::Kind::Value) if (baseObject.kind == Object::Kind::Value)
dynamicOffset += As<Int4>(routine->getValue(id)[0]); dynamicOffset += As<Int4>(routine->getIntermediate(id)[0]);
for (auto i = 0u; i < numIndexes; i++) for (auto i = 0u; i < numIndexes; i++)
{ {
...@@ -525,7 +526,7 @@ namespace sw ...@@ -525,7 +526,7 @@ namespace sw
if (obj.kind == Object::Kind::Constant) if (obj.kind == Object::Kind::Constant)
constantOffset += stride * GetConstantInt(indexIds[i]); constantOffset += stride * GetConstantInt(indexIds[i]);
else else
dynamicOffset += Int4(stride) * As<Int4>(routine->getValue(indexIds[i])[0]); dynamicOffset += Int4(stride) * As<Int4>(routine->getIntermediate(indexIds[i])[0]);
break; break;
} }
...@@ -685,7 +686,7 @@ namespace sw ...@@ -685,7 +686,7 @@ namespace sw
auto &object = getObject(insn.word(2)); auto &object = getObject(insn.word(2));
auto &type = getType(insn.word(1)); auto &type = getType(insn.word(1));
auto &pointer = getObject(insn.word(3)); auto &pointer = getObject(insn.word(3));
routine->createLvalue(insn.word(2), type.sizeInComponents); // TODO: this should be an ssavalue! routine->createIntermediate(insn.word(2), type.sizeInComponents);
auto &pointerBase = getObject(pointer.pointerBase); auto &pointerBase = getObject(pointer.pointerBase);
if (pointerBase.storageClass == spv::StorageClassImage || if (pointerBase.storageClass == spv::StorageClassImage ||
...@@ -696,18 +697,18 @@ namespace sw ...@@ -696,18 +697,18 @@ namespace sw
} }
SpirvRoutine::Value& ptrBase = routine->getValue(pointer.pointerBase); SpirvRoutine::Value& ptrBase = routine->getValue(pointer.pointerBase);
auto & dst = routine->getValue(insn.word(2)); auto & dst = routine->getIntermediate(insn.word(2));
if (pointer.kind == Object::Kind::Value) if (pointer.kind == Object::Kind::Value)
{ {
auto offsets = As<Int4>(routine->getValue(insn.word(3))); auto offsets = As<Int4>(routine->getIntermediate(insn.word(3))[0]);
for (auto i = 0u; i < object.sizeInComponents; i++) for (auto i = 0u; i < object.sizeInComponents; i++)
{ {
// i wish i had a Float,Float,Float,Float constructor here.. // i wish i had a Float,Float,Float,Float constructor here..
Float4 v; Float4 v;
for (int j = 0; j < 4; j++) for (int j = 0; j < 4; j++)
v = Insert(v, Extract(ptrBase[Int(i) + Extract(offsets, j)], j), j); v = Insert(v, Extract(ptrBase[Int(i) + Extract(offsets, j)], j), j);
dst[i] = v; dst.emplace(i, v);
} }
} }
else else
...@@ -715,7 +716,7 @@ namespace sw ...@@ -715,7 +716,7 @@ namespace sw
// no divergent offsets to worry about // no divergent offsets to worry about
for (auto i = 0u; i < object.sizeInComponents; i++) for (auto i = 0u; i < object.sizeInComponents; i++)
{ {
dst[i] = ptrBase[i]; dst.emplace(i, ptrBase[i]);
} }
} }
break; break;
...@@ -725,7 +726,7 @@ namespace sw ...@@ -725,7 +726,7 @@ namespace sw
auto &object = getObject(insn.word(2)); auto &object = getObject(insn.word(2));
auto &type = getType(insn.word(1)); auto &type = getType(insn.word(1));
auto &base = getObject(insn.word(3)); auto &base = getObject(insn.word(3));
routine->createLvalue(insn.word(2), type.sizeInComponents); // TODO: this should be an ssavalue! routine->createIntermediate(insn.word(2), type.sizeInComponents);
auto &pointerBase = getObject(object.pointerBase); auto &pointerBase = getObject(object.pointerBase);
assert(type.sizeInComponents == 1); assert(type.sizeInComponents == 1);
assert(base.pointerBase == object.pointerBase); assert(base.pointerBase == object.pointerBase);
...@@ -737,8 +738,8 @@ namespace sw ...@@ -737,8 +738,8 @@ namespace sw
UNIMPLEMENTED("Descriptor-backed OpAccessChain not yet implemented"); UNIMPLEMENTED("Descriptor-backed OpAccessChain not yet implemented");
} }
auto & dst = routine->getValue(insn.word(2)); auto & dst = routine->getIntermediate(insn.word(2));
dst[0] = As<Float4>(WalkAccessChain(insn.word(3), insn.wordCount() - 4, insn.wordPointer(4), routine)); dst.emplace(0, As<Float4>(WalkAccessChain(insn.word(3), insn.wordCount() - 4, insn.wordPointer(4), routine)));
break; break;
} }
case spv::OpStore: case spv::OpStore:
...@@ -755,11 +756,11 @@ namespace sw ...@@ -755,11 +756,11 @@ namespace sw
} }
SpirvRoutine::Value& ptrBase = routine->getValue(pointer.pointerBase); SpirvRoutine::Value& ptrBase = routine->getValue(pointer.pointerBase);
auto & src = routine->getValue(insn.word(2));; auto & src = routine->getIntermediate(insn.word(2));;
if (pointer.kind == Object::Kind::Value) if (pointer.kind == Object::Kind::Value)
{ {
auto offsets = As<Int4>(routine->getValue(insn.word(1))); auto offsets = As<Int4>(routine->getIntermediate(insn.word(1))[0]);
for (auto i = 0u; i < object.sizeInComponents; i++) for (auto i = 0u; i < object.sizeInComponents; i++)
{ {
// Scattered store // Scattered store
......
...@@ -30,24 +30,85 @@ ...@@ -30,24 +30,85 @@
namespace sw namespace sw
{ {
// Incrementally constructed complex bundle of rvalues
// Effectively a restricted vector, supporting only:
// - allocation to a (runtime-known) fixed size
// - in-place construction of elements
// - const operator[]
class Intermediate
{
public:
using Scalar = RValue<Float4>;
Intermediate(uint32_t size) : contents(new ContentsType[size]), size(size) {}
~Intermediate()
{
for (auto i = 0u; i < size; i++)
reinterpret_cast<Scalar *>(&contents[i])->~Scalar();
delete [] contents;
}
void emplace(uint32_t n, Scalar&& value)
{
assert(n < size);
new (&contents[n]) Scalar(value);
}
Scalar const & operator[](uint32_t n) const
{
assert(n < size);
return *reinterpret_cast<Scalar const *>(&contents[n]);
}
// No copy/move construction or assignment
Intermediate(Intermediate const &) = delete;
Intermediate(Intermediate &&) = delete;
Intermediate & operator=(Intermediate const &) = delete;
Intermediate & operator=(Intermediate &&) = delete;
private:
using ContentsType = std::aligned_storage<sizeof(Scalar), alignof(Scalar)>::type;
ContentsType *contents;
uint32_t size;
};
class SpirvRoutine class SpirvRoutine
{ {
public: public:
using Value = Array<Float4>; using Value = Array<Float4>;
std::unordered_map<uint32_t, std::unique_ptr<Value>> lvalues; std::unordered_map<uint32_t, Value> lvalues;
std::unique_ptr<Value> inputs = std::unique_ptr<Value>(new Value(MAX_INTERFACE_COMPONENTS));
std::unique_ptr<Value> outputs = std::unique_ptr<Value>(new Value(MAX_INTERFACE_COMPONENTS)); std::unordered_map<uint32_t, Intermediate> intermediates;
std::unique_ptr<Value> const inputs = std::unique_ptr<Value>(new Value(MAX_INTERFACE_COMPONENTS));
std::unique_ptr<Value> const outputs = std::unique_ptr<Value>(new Value(MAX_INTERFACE_COMPONENTS));
void createLvalue(uint32_t id, uint32_t size) void createLvalue(uint32_t id, uint32_t size)
{ {
lvalues.emplace(id, std::unique_ptr<Value>(new Value(size))); lvalues.emplace(id, Value(size));
}
void createIntermediate(uint32_t id, uint32_t size)
{
intermediates.emplace(std::piecewise_construct,
std::forward_as_tuple(id),
std::forward_as_tuple(size));
} }
Value& getValue(uint32_t id) Value& getValue(uint32_t id)
{ {
auto it = lvalues.find(id); auto it = lvalues.find(id);
assert(it != lvalues.end()); assert(it != lvalues.end());
return *it->second; return it->second;
}
Intermediate& getIntermediate(uint32_t id)
{
auto it = intermediates.find(id);
assert(it != intermediates.end());
return it->second;
} }
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment