Commit af1eedb3 by Ben Clayton

SpirvShader: Optimize SIMD sequential, fully-in-bounds loads & stores

For sequential, fully-in-bound vectors: * Loads can safely be a regular vector load. We mask just to keep behavior consistent with rr::MaskedLoad and rr::Gather. * Stores of non-atomics can be implemented as a read-modify-write. These optimizations have drastic performance improvements on architectures where there are no masked-read and masked-write instructions. Bug: b/135609394 Change-Id: I552cc38f4aeae73f8db079a0a11da6a8db857710 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/33628Tested-by: 's avatarBen Clayton <bclayton@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com> Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 142f4581
...@@ -290,8 +290,19 @@ namespace sw ...@@ -290,8 +290,19 @@ namespace sw
T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */) T Load(Pointer ptr, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
{ {
using EL = typename Element<T>::type; using EL = typename Element<T>::type;
if (ptr.hasStaticSequentialOffsets(sizeof(float)) &&
ptr.isStaticAllInBounds(sizeof(float)))
{
// All elements sequential and in bounds.
// Perform regular load.
auto load = rr::Load(rr::Pointer<SIMD::Int>(ptr.base + ptr.staticOffsets[0]), alignment, atomic, order);
return As<T>(load & mask); // TODO: Mask here should be unnecessary, but keeps with MaskedLoad and Gather.
}
auto offsets = ptr.offsets(); auto offsets = ptr.offsets();
mask &= ptr.isInBounds(sizeof(float)); // Disable OOB reads. mask &= ptr.isInBounds(sizeof(float)); // Disable OOB reads.
if (!atomic && order == std::memory_order_relaxed) if (!atomic && order == std::memory_order_relaxed)
{ {
if (ptr.hasStaticEqualOffsets()) if (ptr.hasStaticEqualOffsets())
...@@ -351,6 +362,7 @@ namespace sw ...@@ -351,6 +362,7 @@ namespace sw
void Store(Pointer ptr, T val, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */) void Store(Pointer ptr, T val, Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
{ {
using EL = typename Element<T>::type; using EL = typename Element<T>::type;
constexpr size_t alignment = sizeof(float);
auto offsets = ptr.offsets(); auto offsets = ptr.offsets();
mask &= ptr.isInBounds(sizeof(float)); // Disable OOB writes. mask &= ptr.isInBounds(sizeof(float)); // Disable OOB writes.
if (!atomic && order == std::memory_order_relaxed) if (!atomic && order == std::memory_order_relaxed)
...@@ -367,16 +379,28 @@ namespace sw ...@@ -367,16 +379,28 @@ namespace sw
Extract(maskedVal, 1) | Extract(maskedVal, 1) |
Extract(maskedVal, 2) | Extract(maskedVal, 2) |
Extract(maskedVal, 3); Extract(maskedVal, 3);
*rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], sizeof(float)) = As<EL>(scalarVal); *rr::Pointer<EL>(ptr.base + ptr.staticOffsets[0], alignment) = As<EL>(scalarVal);
} }
return;
} }
else if (ptr.hasStaticSequentialOffsets(sizeof(float)))
if (ptr.hasStaticSequentialOffsets(sizeof(float))) {
if (ptr.isStaticAllInBounds(sizeof(float)))
{
// Pointer has no elements OOB, and the store is not atomic.
// Perform a RMW.
auto p = rr::Pointer<SIMD::Int>(ptr.base + ptr.staticOffsets[0], alignment);
auto prev = *p;
*p = (prev & ~mask) | (As<SIMD::Int>(val) & mask);
}
else
{
rr::MaskedStore(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), val, mask, alignment);
}
}
else
{ {
return rr::MaskedStore(rr::Pointer<T>(ptr.base + ptr.staticOffsets[0]), val, mask, sizeof(float)); rr::Scatter(rr::Pointer<EL>(ptr.base), val, offsets, mask, alignment);
} }
return rr::Scatter(rr::Pointer<EL>(ptr.base), val, offsets, mask, sizeof(float));
} }
else else
{ {
...@@ -385,7 +409,7 @@ namespace sw ...@@ -385,7 +409,7 @@ namespace sw
{ {
// Store all elements in a single SIMD instruction. // Store all elements in a single SIMD instruction.
auto offset = Extract(offsets, 0); auto offset = Extract(offsets, 0);
Store(val, rr::Pointer<T>(&ptr.base[offset]), sizeof(float), atomic, order); Store(val, rr::Pointer<T>(&ptr.base[offset]), alignment, atomic, order);
} }
Else Else
{ {
...@@ -395,7 +419,7 @@ namespace sw ...@@ -395,7 +419,7 @@ namespace sw
If(Extract(mask, i) != 0) If(Extract(mask, i) != 0)
{ {
auto offset = Extract(offsets, i); auto offset = Extract(offsets, i);
rr::Store(Extract(val, i), rr::Pointer<EL>(&ptr.base[offset]), sizeof(float), atomic, order); rr::Store(Extract(val, i), rr::Pointer<EL>(&ptr.base[offset]), alignment, atomic, order);
} }
} }
} }
......
...@@ -142,14 +142,14 @@ namespace sw ...@@ -142,14 +142,14 @@ namespace sw
{ {
ASSERT(accessSize > 0); ASSERT(accessSize > 0);
if (!hasDynamicOffsets && !hasDynamicLimit) if (isStaticAllInBounds(accessSize))
{
// Common fast paths.
if (hasStaticEqualOffsets())
{ {
return SIMD::Int((staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0); return SIMD::Int(0xffffffff);
} }
if (!hasDynamicOffsets && !hasDynamicLimit)
{
// Common fast paths.
static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4"); static_assert(SIMD::Width == 4, "Expects SIMD::Width to be 4");
return SIMD::Int( return SIMD::Int(
(staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0, (staticOffsets[0] + accessSize - 1 < staticLimit) ? 0xffffffff : 0,
...@@ -161,6 +161,22 @@ namespace sw ...@@ -161,6 +161,22 @@ namespace sw
return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit())); return CmpLT(offsets() + SIMD::Int(accessSize - 1), SIMD::Int(limit()));
} }
inline bool isStaticAllInBounds(unsigned int accessSize) const
{
if (hasDynamicOffsets || hasDynamicLimit)
{
return false;
}
for (int i = 0; i < SIMD::Width; i++)
{
if (staticOffsets[i] + accessSize - 1 >= staticLimit)
{
return false;
}
}
return true;
}
inline Int limit() const inline Int limit() const
{ {
return dynamicLimit + staticLimit; return dynamicLimit + staticLimit;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment